
> ## run public-opinion scripts (Section 3)
> setwd(pcodedir); dir()
 [1] "01_group.R"                "02_prep.R"                 "03_summarize.R"           
 [4] "04_summarize_output.R"     "05_regmods.R"              "06_regmods_output.R"      
 [7] "07_predict.R"              "08_predict_output.R"       "09_extra_output.R"        
[10] "10_RandR_summarize.R"      "11_RandR_model.R"          "12_RandR_modeloutput.R"   
[13] "functions.R"               "getcode.R"                 "getinfo2.R"               
[16] "readpoll_functions.R"      "XX_predict.R"              "XX_predict_output.R"      
[19] "XX_predict_outputTrends.R" "XX_predictscribbles.R"     "XX_regmods.R"             
[22] "XX_regmods_output.R"       "XX_runall.R"               "XX_scanmeta.R"            
[25] "XXdirs.R"                 

> myfiles <- dir()[str_detect(dir(), '^[0-9]{2}')]

> for (myfile in myfiles) {
+   print("######")
+   print("Running:")
+   print(myfile)
+ 
+   pcodedir <- file.path(
+     find_root(criterion = has_ .... [TRUNCATED] 
[1] "######"
[1] "Running:"
[1] "01_group.R"

> #########################################################
> #########################################################
> 
> #clear workspace
> rm(list=ls())

> #load packages
> require(stringr)

> require(plyr)

> require(dplyr)

> require(zoo)

> require(data.table)

> require(tidyr)

> require(rprojroot)

> #set dirs
> rootdir<-find_root(
+   criterion=has_file('_rapol.Rproj')
+ )

> codedir<-file.path(rootdir,"code")

> setwd(codedir); dir()
[1] "01_publicopinion" "02_voting"        "03_dind"          "dirs.R"          
[5] "runeverything.R" 

> source('dirs.R')

> #helper functions
> setwd(pcodedir); dir()
 [1] "01_group.R"                "02_prep.R"                 "03_summarize.R"           
 [4] "04_summarize_output.R"     "05_regmods.R"              "06_regmods_output.R"      
 [7] "07_predict.R"              "08_predict_output.R"       "09_extra_output.R"        
[10] "10_RandR_summarize.R"      "11_RandR_model.R"          "12_RandR_modeloutput.R"   
[13] "functions.R"               "getcode.R"                 "getinfo2.R"               
[16] "readpoll_functions.R"      "XX_predict.R"              "XX_predict_output.R"      
[19] "XX_predict_outputTrends.R" "XX_predictscribbles.R"     "XX_regmods.R"             
[22] "XX_regmods_output.R"       "XX_runall.R"               "XX_scanmeta.R"            
[25] "XXdirs.R"                 

> source('readpoll_functions.R')
Loading required package: foreign
 

> source('getinfo2.R')

> #helper function for reading
> readme.csv<-function(x) {
+   df<-read.csv(x,stringsAsFactors=F)
+   df$X<-NULL
+   return(df)
+ }

> ############################################################# 
> ############################################################# 
> 
> #load all the df's
> setwd(datadir)

> abcdfs<-readme.csv('abcdfs.csv')

> anesdfs<-readme.csv('anesdf.csv')

> cbsdfs<-readme.csv('cbsdfs.csv')

> gallupdfs<-readme.csv('gallupdfs.csv')

> gssdfs<-readme.csv('gssdf.csv')

> nbclatdfs<-readme.csv('nbclatdfs.csv')

> roperdfs<-readme.csv('roperdfs.csv')

> timedfs<-readme.csv('timedfs.csv')

> allpolls<-list(
+   abcdfs,
+   anesdfs,
+   cbsdfs,
+   gallupdfs,
+   gssdfs,
+   nbclatdfs,
+   roperdfs,
+   timedfs
+ )

> names(allpolls)<-c(
+   "ABC",
+   "ANES",
+   "CBS",
+   "Gallup",
+   "GSS",
+   "NBCLAT",
+   "Roper",
+   "Time"
+ )

> ############################################################# 
> ############################################################# 
> 
> #ERROR-CHECKING
> 
> ############################################################# 
> ############################################################# 
> 
> #checking whether all vars have _n, _np, and _n variants
> # questions<-allcodes$name 
> # allnames<-sapply(allpolls,names) %>% 
> #   unlist %>% 
> #   unname %>% 
> #   unique
> # tmp<-lapply(seq_along(questions),function(k) {
> #   #k<-1
> #   pro<-questions[k]
> #   con<-getnegative(pro)
> #   neutral<-getneutral(pro)
> #   tmpchecker<-c(pro,con,neutral)
> #   present<-sapply(tmpchecker,function(x) x%in%allnames) %>%
> #     sum
> #   return(present)
> # }); names(tmp)<-questions
> 
> # if(sum(tmp<3)>0) 
> #   stop("Not all variants present.")
> 
> #check NA occurrence of all key variables
> blackNAs<-lapply(allpolls,function(poll) {
+   #poll<-abcdfs
+   tapply(poll$black,poll$id,function(x) {
+     100*sum(is.na(x))/length(x)
+   })
+ }) %>% unlist %>% sort

> blackNAs
                     ABC.ABC-1981 (df1981:AWP8846.DAT) 
                                            0.00000000 
                   ABC.ABC-1982 (df1982.5:Awp7598.dat) 
                                            0.00000000 
                     ABC.ABC-1982 (df1982:ABC7871.DAT) 
                                            0.00000000 
                      ABC.ABC-1995 (df1995:aw5514.dat) 
                                            0.00000000 
                                        ANES.ANES-1956 
                                            0.00000000 
                                        ANES.ANES-1958 
                                            0.00000000 
                                        ANES.ANES-1960 
                                            0.00000000 
                                        ANES.ANES-1964 
                                            0.00000000 
                                        ANES.ANES-1966 
                                            0.00000000 
                                        ANES.ANES-1968 
                                            0.00000000 
                                        ANES.ANES-1970 
                                            0.00000000 
                 Gallup.Gallup-1965 (df1965.5:709.dat) 
                                            0.00000000 
            Gallup.Gallup-1965 (df1965.7:AIPO0716.dat) 
                                            0.00000000 
                   Gallup.Gallup-1965 (df1965:704.dat) 
                                            0.00000000 
              Gallup.Gallup-1966 (df1966:aipo0729.dat) 
                                            0.00000000 
              Gallup.Gallup-1967 (df1967:aipo0746.dat) 
                                            0.00000000 
              Gallup.Gallup-1968 (df1968:aipo0757.dat) 
                                            0.00000000 
                 Gallup.Gallup-1969 (df1969.5:773.dat) 
                                            0.00000000 
            Gallup.Gallup-1969 (df1969.7:aipo6955.dat) 
                                            0.00000000 
Gallup.Gallup-1969 (df1969:aipo0774.dat,aipo0774S.dat) 
                                            0.00000000 
                   Gallup.Gallup-1971 (df1971:839.dat) 
                                            0.00000000 
                 Gallup.Gallup-1972 (df1972.5:860.dat) 
                                            0.00000000 
                 Gallup.Gallup-1972 (df1972.7:861.dat) 
                                            0.00000000 
                   Gallup.Gallup-1972 (df1972:846.dat) 
                                            0.00000000 
                   Gallup.Gallup-1976 (df1976:949.dat) 
                                            0.00000000 
                   Gallup.Gallup-1978 (df1978:995.dat) 
                                            0.00000000 
                  Gallup.Gallup-1981 (df1981:1168.dat) 
                                            0.00000000 
                Gallup.Gallup-1982 (df1982.5:1189.dat) 
                                            0.00000000 
              Gallup.Gallup-1982 (df1982:aipo1202.dat) 
                                            0.00000000 
                  Gallup.Gallup-1985 (df1985:1248.dat) 
                                            0.00000000 
                  Gallup.Gallup-1986 (df1986:1261.dat) 
                                            0.00000000 
               Gallup.Gallup-1990 (df1990:g922017.DAT) 
                                            0.00000000 
                                          GSS.GSS-1972 
                                            0.00000000 
                                          GSS.GSS-1973 
                                            0.00000000 
                                          GSS.GSS-1974 
                                            0.00000000 
                                          GSS.GSS-1975 
                                            0.00000000 
                                          GSS.GSS-1976 
                                            0.00000000 
                                          GSS.GSS-1977 
                                            0.00000000 
                                          GSS.GSS-1978 
                                            0.00000000 
                                          GSS.GSS-1980 
                                            0.00000000 
                                          GSS.GSS-1982 
                                            0.00000000 
                                          GSS.GSS-1983 
                                            0.00000000 
                                          GSS.GSS-1984 
                                            0.00000000 
                                          GSS.GSS-1985 
                                            0.00000000 
                                          GSS.GSS-1986 
                                            0.00000000 
                                          GSS.GSS-1987 
                                            0.00000000 
                                          GSS.GSS-1988 
                                            0.00000000 
                                          GSS.GSS-1989 
                                            0.00000000 
                                          GSS.GSS-1990 
                                            0.00000000 
                                          GSS.GSS-1991 
                                            0.00000000 
                                          GSS.GSS-1993 
                                            0.00000000 
                                          GSS.GSS-1994 
                                            0.00000000 
                                          GSS.GSS-1996 
                                            0.00000000 
                                          GSS.GSS-1998 
                                            0.00000000 
                                          GSS.GSS-2000 
                                            0.00000000 
                                          GSS.GSS-2002 
                                            0.00000000 
                                          GSS.GSS-2004 
                                            0.00000000 
                                          GSS.GSS-2006 
                                            0.00000000 
                                          GSS.GSS-2008 
                                            0.00000000 
                                          GSS.GSS-2010 
                                            0.00000000 
                                          GSS.GSS-2012 
                                            0.00000000 
                                          GSS.GSS-2014 
                                            0.00000000 
                   NBCLAT.LAT-1993 (df1993:Lat322.dat) 
                                            0.00000000 
                   NBCLAT.LAT-1994 (df1994:Lat328.dat) 
                                            0.00000000 
                 NBCLAT.LAT-1995 (df1995.5:LAT369.DAT) 
                                            0.00000000 
                Roper.Roper-1971 (df1971:rcom0524.dat) 
                                            0.00000000 
                Roper.Roper-1974 (df1974:rprr7409.dat) 
                                            0.00000000 
              Roper.Roper-1975 (df1975.5:rprr7501.dat) 
                                            0.00000000 
                Roper.Roper-1975 (df1975:rprr7509.dat) 
                                            0.00000000 
                Roper.Roper-1976 (df1976.5:rr7601.por) 
                                            0.00000000 
              Roper.Roper-1983 (df1983.5:rprr8301.dat) 
                                            0.00000000 
            Gallup.Gallup-1967 (df1967.5:aipo0749.dat) 
                                            0.02836075 
                                        ANES.ANES-1972 
                                            0.03696858 
                Roper.Roper-1983 (df1983:rprr8309.dat) 
                                            0.05000000 
                                        ANES.ANES-1990 
                                            0.05050505 
              Gallup.Gallup-1957 (df1957:aipo0588.dat) 
                                            0.06544503 
                   Gallup.Gallup-1960 (df1960:625.dat) 
                                            0.06700168 
                                        ANES.ANES-1962 
                                            0.07710100 
                                        ANES.ANES-1986 
                                            0.09191176 
               Gallup.Gallup-1991 (df1991:g222002.dat) 
                                            0.10101010 
                                        ANES.ANES-1980 
                                            0.12391574 
                Roper.Roper-1982 (df1982.5:rr8201.dat) 
                                            0.15090543 
                Roper.Roper-1984 (df1984:rprr8402.dat) 
                                            0.20000000 
                  Roper.Roper-1985 (df1985:rr8501.por) 
                                            0.20242915 
                Roper.Roper-1982 (df1982:rprr8209.dat) 
                                            0.25000000 
            Gallup.Gallup-1994 (df1994.95:A422039.DAT) 
                                            0.28957529 
             Gallup.Gallup-1991 (df1991.5:G122021.dat) 
                                            0.29910269 
                                        ANES.ANES-1988 
                                            0.34313725 
               Roper.Roper-1978 (df1978:ROPER7802.dat) 
                                            0.34965035 
              Roper.Roper-1984 (df1984.5:rprr8401.por) 
                                            0.35000000 
                                        ANES.ANES-1982 
                                            0.35260931 
                                        ANES.ANES-1984 
                                            0.39875942 
              Gallup.Gallup-1993 (df1993:g422017B.DAT) 
                                            0.40192926 
                                        ANES.ANES-1996 
                                            0.40840140 
              Roper.Roper-1981 (df1981.5:rprr8101.dat) 
                                            0.41407867 
                Roper.Roper-1978 (df1978.5:RR7801.DAT) 
                                            0.44977511 
                  Roper.Roper-1987 (df1987:rr8701.DAT) 
                                            0.45067601 
                                        ANES.ANES-2008 
                                            0.47372954 
                                        ANES.ANES-1978 
                                            0.47743056 
         Gallup.Gallup-2003 (df2003.5:2003_06_09x.por) 
                                            0.48590865 
                                        ANES.ANES-2012 
                                            0.49036185 
             Gallup.Gallup-1995 (df1995.7:9509012.DAT) 
                                            0.49455984 
              Gallup.Gallup-1985 (df1985.5:g85064.por) 
                                            0.49554014 
             Gallup.Gallup-1993 (df1993.5:A322052.DAT) 
                                            0.49850449 
               Gallup.Gallup-1995 (df1995:5001020.DAT) 
                                            0.50000000 
                Roper.Roper-1977 (df1977:ROPER771.DAT) 
                                            0.50000000 
             Gallup.Gallup-1992 (df1992.7:g322014.por) 
                                            0.50468637 
                  Roper.Roper-1986 (df1986:rr8601.dat) 
                                            0.50505051 
                Roper.Roper-1979 (df1979:rprr7909.dat) 
                                            0.51387461 
              Gallup.Gallup-1992 (df1992:life1992.dat) 
                                            0.57377049 
                                        ANES.ANES-2004 
                                            0.57755776 
                     CBS.CBS-1994 (df1994:CJUN94A.DAT) 
                                            0.61349693 
                    Time.Time-1994 (df1994:y94012.por) 
                                            0.61881188 
                                        ANES.ANES-1998 
                                            0.62451210 
               Gallup.Gallup-1999 (df1999:9902009.dat) 
                                            0.66413662 
                Roper.Roper-1980 (df1980:rprr8001.dat) 
                                            0.67567568 
                 NBCLAT.NBC-1995 (df1995:Nbcw4063.dat) 
                                            0.68259386 
             Gallup.Gallup-1999 (df1999.5:9903015.dat) 
                                            0.68560235 
               Time.Time-1989 (df1989.5:y19895406.dat) 
                                            0.69169960 
            Gallup.Gallup-1994 (df1994.9:A0807018.DAT) 
                                            0.69238378 
                                        ANES.ANES-2002 
                                            0.72799471 
                Roper.Roper-1976 (df1976:rprr7609.dat) 
                                            0.74925075 
                                        ANES.ANES-1974 
                                            0.76190476 
              Gallup.Gallup-2001 (df2001:g2001-06.por) 
                                            0.78740157 
                                        ANES.ANES-1954 
                                            0.79016681 
               Gallup.Gallup-2009 (df2009:g200911.por) 
                                            0.79129575 
              Gallup.Gallup-1998 (df1998:A9806017.DAT) 
                                            0.79760718 
                 Gallup.Gallup-1988 (df1988:Ai875.dat) 
                                            0.79920080 
                                        ANES.ANES-1976 
                                            0.80071174 
               Gallup.Gallup-2008 (df2008:g200824.por) 
                                            0.85158151 
              Roper.Roper-1979 (df1979.5:Roper791.dat) 
                                            0.87493567 
           Gallup.Gallup-2002 (df2002:2002_06_21x.por) 
                                            0.88235294 
                     ABC.ABC-2003 (df2003:abcw909.por) 
                                            0.88261253 
               Gallup.Gallup-1989 (df1989.5:89135.dat) 
                                            0.89068826 
                 NBCLAT.NBC-1985 (df1985:n1985may.por) 
                                            0.93926111 
               Gallup.Gallup-1995 (df1995.5:A018A.DAT) 
                                            0.99206349 
                    Time.Time-1995 (df1995:y95002.por) 
                                            1.00000000 
             Gallup.Gallup-2000 (df2000.5:g200008.dat) 
                                            1.05263158 
              Gallup.Gallup-1996 (df1996:A9605012.DAT) 
                                            1.07948970 
            Gallup.Gallup-1997 (df1997.5:g9707017.DAT) 
                                            1.09561753 
               Gallup.Gallup-1997 (df1997:9708018.dat) 
                                            1.09890110 
                       ABC.ABC-2005 (df2005:aw980.por) 
                                            1.10905730 
                                        ANES.ANES-1992 
                                            1.12676056 
              Gallup.Gallup-1989 (df1989:Oct89nii.dat) 
                                            1.13452188 
               Gallup.Gallup-2006 (df2006:g200618.dat) 
                                            1.18460020 
             Gallup.Gallup-2001 (df2001.5:a200121.dat) 
                                            1.18694362 
            Gallup.Gallup-1994 (df1994.5:A0807010.DAT) 
                                            1.21951220 
               Gallup.Gallup-2011 (df2011:g200111.por) 
                                            1.27450980 
            Gallup.Gallup-1999 (df1999.7:A9906032.dat) 
                                            1.27952756 
           Gallup.Gallup-2003 (df2003:2003_05_19x.dat) 
                                            1.28205128 
            Gallup.Gallup-1998 (df1998.5:A9810040.DAT) 
                                            1.28331688 
             Gallup.Gallup-2011 (df2011.5:g201121.por) 
                                            1.28458498 
                    CBS.CBS-2012 (df2012:c201207a.por) 
                                            1.28558310 
             Gallup.Gallup-2000 (df2000.9:g200025.dat) 
                                            1.37120470 
           Gallup.Gallup-2012 (df2012.7:usa201219.por) 
                                            1.37931034 
             Gallup.Gallup-2012 (df2012.5:g201208.por) 
                                            1.39442231 
                    Time.Time-1993 (df1993:y85212.dat) 
                                            1.40000000 
             Gallup.Gallup-1991 (df1991.7:g222006.por) 
                                            1.47904684 
           Gallup.Gallup-2013 (df2013.7:usa201222.dat) 
                                            1.54142582 
              Gallup.Gallup-1994 (df1994:A0807020.DAT) 
                                            1.56555773 
               Gallup.Gallup-2000 (df2000:a200026.dat) 
                                            1.66666667 
                Roper.Roper-1981 (df1981:rprr8102.dat) 
                                            1.67173252 
           Gallup.Gallup-2009 (df2009.5:usa200921.por) 
                                            1.76991150 
                   Time.Time-2001 (df2001:y200105.dat) 
                                            1.77339901 
               Gallup.Gallup-2004 (df2004:g200418.por) 
                                            1.79640719 
           Gallup.Gallup-2010 (df2010.5:usa201021.por) 
                                            1.83220829 
             Gallup.Gallup-2000 (df2000.7:a200010.dat) 
                                            1.85185185 
                     ABC.ABC-2005 (df2005.5:aw985.por) 
                                            1.89243028 
                        ABC.ABC-2006 (df2006:1015.por) 
                                            1.90000000 
                    Time.Time-1997 (df1997:y97006.por) 
                                            1.95312500 
             Gallup.Gallup-1992 (df1992.5:g205115.por) 
                                            1.97368421 
                                        ANES.ANES-1994 
                                            2.11699164 
                  Time.Time-1995 (df1995.5:y95015.por) 
                                            2.12500000 
             Gallup.Gallup-2006 (df2006.5:g200621.por) 
                                            2.19560878 
                                        ANES.ANES-2000 
                                            2.21361372 
              Roper.Roper-1974 (df1974.5:rprr7401.dat) 
                                            2.42937853 
               Gallup.Gallup-2010 (df2010:g201009.por) 
                                            2.45098039 
                                        ANES.ANES-1948 
                                            2.56797583 
               Gallup.Gallup-2005 (df2005:g200524.por) 
                                            2.58964143 
               Gallup.Gallup-2007 (df2007:a200721.por) 
                                            2.68123138 
                 Time.Time-2003 (df2003:time01_03.dat) 
                                            2.88028803 
                   Gallup.Gallup-1956 (df1956:562.dat) 
                                            3.05000000 
                Roper.Roper-1973 (df1973:rprr7309.dat) 
                                            3.24623911 
               Gallup.Gallup-2013 (df2013:g201307.por) 
                                            3.98953564 
               Gallup.Gallup-2014 (df2014:g201411.por) 
                                            4.07348243 
             Gallup.Gallup-2014 (df2014.5:g201407.por) 
                                            4.86854917 
              Gallup.Gallup-1955 (df1955:aipo0550.dat) 
                                            5.22548318 
                                        ANES.ANES-1952 
                                            5.47656661 
             Gallup.Gallup-2013 (df2013.5:g201321.dat) 
                                            5.81959263 
                 Time.Time-1989 (df1989:y19985415.por) 
                                           16.66666667 

> femaleNAs<-lapply(allpolls,function(poll) {
+   #poll<-abcdfs
+   tapply(poll$female,poll$id,function(x) {
+     100*sum(is.na(x))/length(x)
+   })
+ }) %>% unlist %>% sort

> femaleNAs
                     ABC.ABC-1981 (df1981:AWP8846.DAT) 
                                            0.00000000 
                   ABC.ABC-1982 (df1982.5:Awp7598.dat) 
                                            0.00000000 
                     ABC.ABC-1982 (df1982:ABC7871.DAT) 
                                            0.00000000 
                      ABC.ABC-1995 (df1995:aw5514.dat) 
                                            0.00000000 
                     ABC.ABC-2003 (df2003:abcw909.por) 
                                            0.00000000 
                     ABC.ABC-2005 (df2005.5:aw985.por) 
                                            0.00000000 
                       ABC.ABC-2005 (df2005:aw980.por) 
                                            0.00000000 
                        ABC.ABC-2006 (df2006:1015.por) 
                                            0.00000000 
                                        ANES.ANES-1954 
                                            0.00000000 
                                        ANES.ANES-1956 
                                            0.00000000 
                                        ANES.ANES-1958 
                                            0.00000000 
                                        ANES.ANES-1960 
                                            0.00000000 
                                        ANES.ANES-1962 
                                            0.00000000 
                                        ANES.ANES-1964 
                                            0.00000000 
                                        ANES.ANES-1966 
                                            0.00000000 
                                        ANES.ANES-1968 
                                            0.00000000 
                                        ANES.ANES-1970 
                                            0.00000000 
                                        ANES.ANES-1972 
                                            0.00000000 
                                        ANES.ANES-1974 
                                            0.00000000 
                                        ANES.ANES-1976 
                                            0.00000000 
                                        ANES.ANES-1978 
                                            0.00000000 
                                        ANES.ANES-1980 
                                            0.00000000 
                                        ANES.ANES-1982 
                                            0.00000000 
                                        ANES.ANES-1984 
                                            0.00000000 
                                        ANES.ANES-1986 
                                            0.00000000 
                                        ANES.ANES-1988 
                                            0.00000000 
                                        ANES.ANES-1990 
                                            0.00000000 
                                        ANES.ANES-1992 
                                            0.00000000 
                                        ANES.ANES-1994 
                                            0.00000000 
                                        ANES.ANES-1996 
                                            0.00000000 
                                        ANES.ANES-1998 
                                            0.00000000 
                                        ANES.ANES-2000 
                                            0.00000000 
                                        ANES.ANES-2002 
                                            0.00000000 
                                        ANES.ANES-2004 
                                            0.00000000 
                                        ANES.ANES-2008 
                                            0.00000000 
                                        ANES.ANES-2012 
                                            0.00000000 
                     CBS.CBS-1994 (df1994:CJUN94A.DAT) 
                                            0.00000000 
                    CBS.CBS-2012 (df2012:c201207a.por) 
                                            0.00000000 
              Gallup.Gallup-1955 (df1955:aipo0550.dat) 
                                            0.00000000 
                   Gallup.Gallup-1956 (df1956:562.dat) 
                                            0.00000000 
                 Gallup.Gallup-1965 (df1965.5:709.dat) 
                                            0.00000000 
            Gallup.Gallup-1965 (df1965.7:AIPO0716.dat) 
                                            0.00000000 
                   Gallup.Gallup-1965 (df1965:704.dat) 
                                            0.00000000 
              Gallup.Gallup-1966 (df1966:aipo0729.dat) 
                                            0.00000000 
              Gallup.Gallup-1967 (df1967:aipo0746.dat) 
                                            0.00000000 
              Gallup.Gallup-1968 (df1968:aipo0757.dat) 
                                            0.00000000 
                 Gallup.Gallup-1969 (df1969.5:773.dat) 
                                            0.00000000 
            Gallup.Gallup-1969 (df1969.7:aipo6955.dat) 
                                            0.00000000 
Gallup.Gallup-1969 (df1969:aipo0774.dat,aipo0774S.dat) 
                                            0.00000000 
                   Gallup.Gallup-1971 (df1971:839.dat) 
                                            0.00000000 
                 Gallup.Gallup-1972 (df1972.5:860.dat) 
                                            0.00000000 
                 Gallup.Gallup-1972 (df1972.7:861.dat) 
                                            0.00000000 
                   Gallup.Gallup-1972 (df1972:846.dat) 
                                            0.00000000 
                   Gallup.Gallup-1976 (df1976:949.dat) 
                                            0.00000000 
                   Gallup.Gallup-1978 (df1978:995.dat) 
                                            0.00000000 
                  Gallup.Gallup-1981 (df1981:1168.dat) 
                                            0.00000000 
                Gallup.Gallup-1982 (df1982.5:1189.dat) 
                                            0.00000000 
              Gallup.Gallup-1982 (df1982:aipo1202.dat) 
                                            0.00000000 
              Gallup.Gallup-1985 (df1985.5:g85064.por) 
                                            0.00000000 
                  Gallup.Gallup-1985 (df1985:1248.dat) 
                                            0.00000000 
                  Gallup.Gallup-1986 (df1986:1261.dat) 
                                            0.00000000 
                 Gallup.Gallup-1988 (df1988:Ai875.dat) 
                                            0.00000000 
               Gallup.Gallup-1989 (df1989.5:89135.dat) 
                                            0.00000000 
              Gallup.Gallup-1989 (df1989:Oct89nii.dat) 
                                            0.00000000 
               Gallup.Gallup-1990 (df1990:g922017.DAT) 
                                            0.00000000 
             Gallup.Gallup-1991 (df1991.5:G122021.dat) 
                                            0.00000000 
             Gallup.Gallup-1991 (df1991.7:g222006.por) 
                                            0.00000000 
             Gallup.Gallup-1992 (df1992.5:g205115.por) 
                                            0.00000000 
             Gallup.Gallup-1992 (df1992.7:g322014.por) 
                                            0.00000000 
              Gallup.Gallup-1992 (df1992:life1992.dat) 
                                            0.00000000 
             Gallup.Gallup-1993 (df1993.5:A322052.DAT) 
                                            0.00000000 
              Gallup.Gallup-1993 (df1993:g422017B.DAT) 
                                            0.00000000 
            Gallup.Gallup-1994 (df1994.5:A0807010.DAT) 
                                            0.00000000 
            Gallup.Gallup-1994 (df1994.9:A0807018.DAT) 
                                            0.00000000 
            Gallup.Gallup-1994 (df1994.95:A422039.DAT) 
                                            0.00000000 
              Gallup.Gallup-1994 (df1994:A0807020.DAT) 
                                            0.00000000 
               Gallup.Gallup-1995 (df1995.5:A018A.DAT) 
                                            0.00000000 
             Gallup.Gallup-1995 (df1995.7:9509012.DAT) 
                                            0.00000000 
               Gallup.Gallup-1995 (df1995:5001020.DAT) 
                                            0.00000000 
              Gallup.Gallup-1996 (df1996:A9605012.DAT) 
                                            0.00000000 
            Gallup.Gallup-1997 (df1997.5:g9707017.DAT) 
                                            0.00000000 
               Gallup.Gallup-1997 (df1997:9708018.dat) 
                                            0.00000000 
            Gallup.Gallup-1998 (df1998.5:A9810040.DAT) 
                                            0.00000000 
              Gallup.Gallup-1998 (df1998:A9806017.DAT) 
                                            0.00000000 
             Gallup.Gallup-1999 (df1999.5:9903015.dat) 
                                            0.00000000 
            Gallup.Gallup-1999 (df1999.7:A9906032.dat) 
                                            0.00000000 
               Gallup.Gallup-1999 (df1999:9902009.dat) 
                                            0.00000000 
             Gallup.Gallup-2000 (df2000.5:g200008.dat) 
                                            0.00000000 
               Gallup.Gallup-2008 (df2008:g200824.por) 
                                            0.00000000 
           Gallup.Gallup-2009 (df2009.5:usa200921.por) 
                                            0.00000000 
               Gallup.Gallup-2009 (df2009:g200911.por) 
                                            0.00000000 
           Gallup.Gallup-2010 (df2010.5:usa201021.por) 
                                            0.00000000 
               Gallup.Gallup-2010 (df2010:g201009.por) 
                                            0.00000000 
             Gallup.Gallup-2011 (df2011.5:g201121.por) 
                                            0.00000000 
               Gallup.Gallup-2011 (df2011:g200111.por) 
                                            0.00000000 
             Gallup.Gallup-2012 (df2012.5:g201208.por) 
                                            0.00000000 
           Gallup.Gallup-2012 (df2012.7:usa201219.por) 
                                            0.00000000 
             Gallup.Gallup-2013 (df2013.5:g201321.dat) 
                                            0.00000000 
           Gallup.Gallup-2013 (df2013.7:usa201222.dat) 
                                            0.00000000 
               Gallup.Gallup-2013 (df2013:g201307.por) 
                                            0.00000000 
             Gallup.Gallup-2014 (df2014.5:g201407.por) 
                                            0.00000000 
                                          GSS.GSS-1972 
                                            0.00000000 
                                          GSS.GSS-1973 
                                            0.00000000 
                                          GSS.GSS-1974 
                                            0.00000000 
                                          GSS.GSS-1975 
                                            0.00000000 
                                          GSS.GSS-1976 
                                            0.00000000 
                                          GSS.GSS-1977 
                                            0.00000000 
                                          GSS.GSS-1978 
                                            0.00000000 
                                          GSS.GSS-1980 
                                            0.00000000 
                                          GSS.GSS-1982 
                                            0.00000000 
                                          GSS.GSS-1983 
                                            0.00000000 
                                          GSS.GSS-1984 
                                            0.00000000 
                                          GSS.GSS-1985 
                                            0.00000000 
                                          GSS.GSS-1986 
                                            0.00000000 
                                          GSS.GSS-1987 
                                            0.00000000 
                                          GSS.GSS-1988 
                                            0.00000000 
                                          GSS.GSS-1989 
                                            0.00000000 
                                          GSS.GSS-1990 
                                            0.00000000 
                                          GSS.GSS-1991 
                                            0.00000000 
                                          GSS.GSS-1993 
                                            0.00000000 
                                          GSS.GSS-1994 
                                            0.00000000 
                                          GSS.GSS-1996 
                                            0.00000000 
                                          GSS.GSS-1998 
                                            0.00000000 
                                          GSS.GSS-2000 
                                            0.00000000 
                                          GSS.GSS-2002 
                                            0.00000000 
                                          GSS.GSS-2004 
                                            0.00000000 
                                          GSS.GSS-2006 
                                            0.00000000 
                                          GSS.GSS-2008 
                                            0.00000000 
                                          GSS.GSS-2010 
                                            0.00000000 
                                          GSS.GSS-2012 
                                            0.00000000 
                                          GSS.GSS-2014 
                                            0.00000000 
                   NBCLAT.LAT-1993 (df1993:Lat322.dat) 
                                            0.00000000 
                   NBCLAT.LAT-1994 (df1994:Lat328.dat) 
                                            0.00000000 
                 NBCLAT.LAT-1995 (df1995.5:LAT369.DAT) 
                                            0.00000000 
                 NBCLAT.NBC-1985 (df1985:n1985may.por) 
                                            0.00000000 
                 NBCLAT.NBC-1995 (df1995:Nbcw4063.dat) 
                                            0.00000000 
                Roper.Roper-1971 (df1971:rcom0524.dat) 
                                            0.00000000 
                Roper.Roper-1973 (df1973:rprr7309.dat) 
                                            0.00000000 
              Roper.Roper-1974 (df1974.5:rprr7401.dat) 
                                            0.00000000 
                Roper.Roper-1974 (df1974:rprr7409.dat) 
                                            0.00000000 
              Roper.Roper-1975 (df1975.5:rprr7501.dat) 
                                            0.00000000 
                Roper.Roper-1975 (df1975:rprr7509.dat) 
                                            0.00000000 
                Roper.Roper-1976 (df1976.5:rr7601.por) 
                                            0.00000000 
                Roper.Roper-1976 (df1976:rprr7609.dat) 
                                            0.00000000 
                Roper.Roper-1977 (df1977:ROPER771.DAT) 
                                            0.00000000 
                Roper.Roper-1978 (df1978.5:RR7801.DAT) 
                                            0.00000000 
               Roper.Roper-1978 (df1978:ROPER7802.dat) 
                                            0.00000000 
              Roper.Roper-1979 (df1979.5:Roper791.dat) 
                                            0.00000000 
                Roper.Roper-1979 (df1979:rprr7909.dat) 
                                            0.00000000 
                Roper.Roper-1980 (df1980:rprr8001.dat) 
                                            0.00000000 
                Roper.Roper-1981 (df1981:rprr8102.dat) 
                                            0.00000000 
                Roper.Roper-1982 (df1982:rprr8209.dat) 
                                            0.00000000 
              Roper.Roper-1983 (df1983.5:rprr8301.dat) 
                                            0.00000000 
                Roper.Roper-1983 (df1983:rprr8309.dat) 
                                            0.00000000 
              Roper.Roper-1984 (df1984.5:rprr8401.por) 
                                            0.00000000 
                Roper.Roper-1984 (df1984:rprr8402.dat) 
                                            0.00000000 
                  Roper.Roper-1985 (df1985:rr8501.por) 
                                            0.00000000 
                  Roper.Roper-1986 (df1986:rr8601.dat) 
                                            0.00000000 
                  Roper.Roper-1987 (df1987:rr8701.DAT) 
                                            0.00000000 
               Time.Time-1989 (df1989.5:y19895406.dat) 
                                            0.00000000 
                    Time.Time-1993 (df1993:y85212.dat) 
                                            0.00000000 
                    Time.Time-1994 (df1994:y94012.por) 
                                            0.00000000 
                  Time.Time-1995 (df1995.5:y95015.por) 
                                            0.00000000 
                    Time.Time-1995 (df1995:y95002.por) 
                                            0.00000000 
                    Time.Time-1997 (df1997:y97006.por) 
                                            0.00000000 
                   Time.Time-2001 (df2001:y200105.dat) 
                                            0.00000000 
                 Time.Time-2003 (df2003:time01_03.dat) 
                                            0.00000000 
            Gallup.Gallup-1967 (df1967.5:aipo0749.dat) 
                                            0.02836075 
              Roper.Roper-1981 (df1981.5:rprr8101.dat) 
                                            0.05175983 
              Gallup.Gallup-1957 (df1957:aipo0588.dat) 
                                            0.06544503 
                   Gallup.Gallup-1960 (df1960:625.dat) 
                                            0.06700168 
                Roper.Roper-1982 (df1982.5:rr8201.dat) 
                                            0.10060362 
         Gallup.Gallup-2003 (df2003.5:2003_06_09x.por) 
                                            0.29154519 
             Gallup.Gallup-2000 (df2000.9:g200025.dat) 
                                            0.29382958 
               Gallup.Gallup-2006 (df2006:g200618.dat) 
                                            0.29615005 
             Gallup.Gallup-2001 (df2001.5:a200121.dat) 
                                            0.29673591 
               Gallup.Gallup-2005 (df2005:g200524.por) 
                                            0.29880478 
           Gallup.Gallup-2003 (df2003:2003_05_19x.dat) 
                                            0.39447732 
               Gallup.Gallup-2004 (df2004:g200418.por) 
                                            0.39920160 
                                        ANES.ANES-1948 
                                            0.45317221 
               Gallup.Gallup-2000 (df2000:a200026.dat) 
                                            0.49019608 
              Gallup.Gallup-2001 (df2001:g2001-06.por) 
                                            0.49212598 
             Gallup.Gallup-2006 (df2006.5:g200621.por) 
                                            0.49900200 
               Gallup.Gallup-1991 (df1991:g222002.dat) 
                                            0.50505051 
             Gallup.Gallup-2000 (df2000.7:a200010.dat) 
                                            0.68226121 
           Gallup.Gallup-2002 (df2002:2002_06_21x.por) 
                                            0.68627451 
               Gallup.Gallup-2007 (df2007:a200721.por) 
                                            0.89374379 
               Gallup.Gallup-2014 (df2014:g201411.por) 
                                            0.95846645 
                                        ANES.ANES-1952 
                                            5.26592944 
                 Time.Time-1989 (df1989:y19985415.por) 
                                           15.87301587 

> edNAs<-lapply(allpolls,function(poll) {
+   #poll<-abcdfs
+   tapply(poll$ed_f,poll$id,function(x) {
+     100*sum(is.na(x))/length(x)
+   })
+ }) %>% unlist %>% sort

> edNAs
                                        ANES.ANES-2004 
                                            0.00000000 
                     CBS.CBS-1994 (df1994:CJUN94A.DAT) 
                                            0.00000000 
                   Gallup.Gallup-1956 (df1956:562.dat) 
                                            0.00000000 
                                          GSS.GSS-1985 
                                            0.00000000 
                                          GSS.GSS-2014 
                                            0.03940110 
                Roper.Roper-1982 (df1982:rprr8209.dat) 
                                            0.05000000 
                                          GSS.GSS-1986 
                                            0.06802721 
                                          GSS.GSS-2004 
                                            0.07112376 
            Gallup.Gallup-1967 (df1967.5:aipo0749.dat) 
                                            0.08508225 
                       ABC.ABC-2005 (df2005:aw980.por) 
                                            0.09242144 
                Roper.Roper-1977 (df1977:ROPER771.DAT) 
                                            0.10000000 
              Roper.Roper-1983 (df1983.5:rprr8301.dat) 
                                            0.10000000 
                                          GSS.GSS-2012 
                                            0.10131712 
                                        ANES.ANES-1972 
                                            0.11090573 
                                          GSS.GSS-1983 
                                            0.12507817 
              Gallup.Gallup-1982 (df1982:aipo1202.dat) 
                                            0.13458950 
                                          GSS.GSS-1990 
                                            0.14577259 
                Roper.Roper-1983 (df1983:rprr8309.dat) 
                                            0.15000000 
                  Roper.Roper-1985 (df1985:rr8501.por) 
                                            0.15182186 
                                        ANES.ANES-1960 
                                            0.16934801 
            Gallup.Gallup-1965 (df1965.7:AIPO0716.dat) 
                                            0.17021277 
                   Gallup.Gallup-1965 (df1965:704.dat) 
                                            0.17182131 
                                        ANES.ANES-1996 
                                            0.17502917 
                     ABC.ABC-1981 (df1981:AWP8846.DAT) 
                                            0.19569472 
              Gallup.Gallup-1994 (df1994:A0807020.DAT) 
                                            0.19569472 
                   Gallup.Gallup-1972 (df1972:846.dat) 
                                            0.19828156 
                Gallup.Gallup-1982 (df1982.5:1189.dat) 
                                            0.19854401 
            Gallup.Gallup-1997 (df1997.5:g9707017.DAT) 
                                            0.19920319 
             Gallup.Gallup-1991 (df1991.5:G122021.dat) 
                                            0.19940179 
             Gallup.Gallup-1993 (df1993.5:A322052.DAT) 
                                            0.19940179 
                                          GSS.GSS-1975 
                                            0.20134228 
                                          GSS.GSS-1974 
                                            0.20215633 
                                          GSS.GSS-1988 
                                            0.20256583 
                                          GSS.GSS-1984 
                                            0.20366599 
                 Gallup.Gallup-1969 (df1969.5:773.dat) 
                                            0.20533881 
              Gallup.Gallup-1967 (df1967:aipo0746.dat) 
                                            0.20691694 
                                          GSS.GSS-1994 
                                            0.23395722 
              Gallup.Gallup-1993 (df1993:g422017B.DAT) 
                                            0.24115756 
                                          GSS.GSS-2006 
                                            0.24390244 
                                          GSS.GSS-2010 
                                            0.24461840 
                                          GSS.GSS-2008 
                                            0.24715769 
                                        ANES.ANES-1980 
                                            0.24783147 
                  Gallup.Gallup-1981 (df1981:1168.dat) 
                                            0.24860162 
                                          GSS.GSS-1993 
                                            0.24906600 
              Roper.Roper-1984 (df1984.5:rprr8401.por) 
                                            0.25000000 
                   Gallup.Gallup-1976 (df1976:949.dat) 
                                            0.25974026 
            Gallup.Gallup-1994 (df1994.95:A422039.DAT) 
                                            0.28957529 
         Gallup.Gallup-2003 (df2003.5:2003_06_09x.por) 
                                            0.29154519 
             Gallup.Gallup-1999 (df1999.5:9903015.dat) 
                                            0.29382958 
             Gallup.Gallup-2000 (df2000.9:g200025.dat) 
                                            0.29382958 
               Gallup.Gallup-2011 (df2011:g200111.por) 
                                            0.29411765 
               Gallup.Gallup-2006 (df2006:g200618.dat) 
                                            0.29615005 
               Time.Time-1989 (df1989.5:y19895406.dat) 
                                            0.29644269 
            Gallup.Gallup-1994 (df1994.9:A0807018.DAT) 
                                            0.29673591 
             Gallup.Gallup-2001 (df2001.5:a200121.dat) 
                                            0.29673591 
               Gallup.Gallup-2005 (df2005:g200524.por) 
                                            0.29880478 
              Gallup.Gallup-1998 (df1998:A9806017.DAT) 
                                            0.29910269 
                Roper.Roper-1984 (df1984:rprr8402.dat) 
                                            0.30000000 
                  Roper.Roper-1987 (df1987:rr8701.DAT) 
                                            0.30045068 
                Roper.Roper-1981 (df1981:rprr8102.dat) 
                                            0.30395137 
                                          GSS.GSS-1996 
                                            0.30991736 
                                          GSS.GSS-1972 
                                            0.30998140 
                Roper.Roper-1973 (df1973:rprr7309.dat) 
                                            0.31670625 
                                          GSS.GSS-2000 
                                            0.31948882 
                   Gallup.Gallup-1978 (df1978:995.dat) 
                                            0.32051282 
                                        ANES.ANES-1968 
                                            0.32113038 
                  Gallup.Gallup-1985 (df1985:1248.dat) 
                                            0.32851511 
                 Gallup.Gallup-1972 (df1972.7:861.dat) 
                                            0.33244681 
                                          GSS.GSS-1973 
                                            0.33244681 
                   Gallup.Gallup-1960 (df1960:625.dat) 
                                            0.33500838 
                                          GSS.GSS-1980 
                                            0.34059946 
              Gallup.Gallup-1966 (df1966:aipo0729.dat) 
                                            0.34100597 
                                        ANES.ANES-1954 
                                            0.35118525 
                                        ANES.ANES-1982 
                                            0.35260931 
                Roper.Roper-1979 (df1979:rprr7909.dat) 
                                            0.35971223 
                Roper.Roper-1980 (df1980:rprr8001.dat) 
                                            0.36382536 
               Gallup.Gallup-1997 (df1997:9708018.dat) 
                                            0.36630037 
                                        ANES.ANES-2000 
                                            0.38738240 
                                        ANES.ANES-1998 
                                            0.39032006 
                                          GSS.GSS-1978 
                                            0.39164491 
           Gallup.Gallup-2003 (df2003:2003_05_19x.dat) 
                                            0.39447732 
               Gallup.Gallup-2009 (df2009:g200911.por) 
                                            0.39564787 
                   NBCLAT.LAT-1994 (df1994:Lat328.dat) 
                                            0.39577836 
               Gallup.Gallup-2004 (df2004:g200418.por) 
                                            0.39920160 
              Gallup.Gallup-1968 (df1968:aipo0757.dat) 
                                            0.40000000 
               Gallup.Gallup-1995 (df1995:5001020.DAT) 
                                            0.40000000 
                                          GSS.GSS-1976 
                                            0.40026684 
                Roper.Roper-1974 (df1974:rprr7409.dat) 
                                            0.40040040 
                Roper.Roper-1982 (df1982.5:rr8201.dat) 
                                            0.40241449 
              Gallup.Gallup-1992 (df1992:life1992.dat) 
                                            0.40983607 
                                          GSS.GSS-1998 
                                            0.42372881 
                                          GSS.GSS-1982 
                                            0.43010753 
             Gallup.Gallup-1992 (df1992.7:g322014.por) 
                                            0.43258832 
                                          GSS.GSS-2002 
                                            0.43399638 
                     ABC.ABC-2003 (df2003:abcw909.por) 
                                            0.44130627 
                                        ANES.ANES-1956 
                                            0.45402951 
                  Roper.Roper-1986 (df1986:rr8601.dat) 
                                            0.45454545 
                                          GSS.GSS-1989 
                                            0.45543266 
                                          GSS.GSS-1991 
                                            0.46143705 
                                        ANES.ANES-1962 
                                            0.46260601 
                                        ANES.ANES-1970 
                                            0.46449900 
                                        ANES.ANES-1966 
                                            0.46475600 
               Gallup.Gallup-2008 (df2008:g200824.por) 
                                            0.48661800 
                     ABC.ABC-1982 (df1982:ABC7871.DAT) 
                                            0.48701299 
                                        ANES.ANES-1976 
                                            0.48932384 
               Gallup.Gallup-2000 (df2000:a200026.dat) 
                                            0.49019608 
              Gallup.Gallup-1996 (df1996:A9605012.DAT) 
                                            0.49067713 
              Gallup.Gallup-2001 (df2001:g2001-06.por) 
                                            0.49212598 
                   Time.Time-2001 (df2001:y200105.dat) 
                                            0.49261084 
            Gallup.Gallup-1998 (df1998.5:A9810040.DAT) 
                                            0.49358342 
             Gallup.Gallup-2011 (df2011.5:g201121.por) 
                                            0.49407115 
             Gallup.Gallup-2006 (df2006.5:g200621.por) 
                                            0.49900200 
               Roper.Roper-1978 (df1978:ROPER7802.dat) 
                                            0.49950050 
              Gallup.Gallup-1955 (df1955:aipo0550.dat) 
                                            0.50107373 
               Gallup.Gallup-1991 (df1991:g222002.dat) 
                                            0.50505051 
                  Gallup.Gallup-1986 (df1986:1261.dat) 
                                            0.50987890 
                   Gallup.Gallup-1971 (df1971:839.dat) 
                                            0.51347882 
                                        ANES.ANES-1978 
                                            0.52083333 
Gallup.Gallup-1969 (df1969:aipo0774.dat,aipo0774S.dat) 
                                            0.53226880 
                   NBCLAT.LAT-1993 (df1993:Lat322.dat) 
                                            0.53655265 
                Roper.Roper-1975 (df1975:rprr7509.dat) 
                                            0.54808171 
                                          GSS.GSS-1987 
                                            0.54975261 
                 Gallup.Gallup-1965 (df1965.5:709.dat) 
                                            0.56625142 
                                        ANES.ANES-1964 
                                            0.57288351 
             Gallup.Gallup-2000 (df2000.7:a200010.dat) 
                                            0.58479532 
                      ABC.ABC-1995 (df1995:aw5514.dat) 
                                            0.58593750 
             Gallup.Gallup-1995 (df1995.7:9509012.DAT) 
                                            0.59347181 
              Gallup.Gallup-1985 (df1985.5:g85064.por) 
                                            0.59464817 
               Gallup.Gallup-1995 (df1995.5:A018A.DAT) 
                                            0.59523810 
                Roper.Roper-1978 (df1978.5:RR7801.DAT) 
                                            0.59970015 
                                        ANES.ANES-2008 
                                            0.60292851 
                                        ANES.ANES-1984 
                                            0.62029242 
                    CBS.CBS-2012 (df2012:c201207a.por) 
                                            0.64279155 
                Roper.Roper-1976 (df1976:rprr7609.dat) 
                                            0.64935065 
                                          GSS.GSS-1977 
                                            0.65359477 
               Gallup.Gallup-1999 (df1999:9902009.dat) 
                                            0.66413662 
             Gallup.Gallup-2000 (df2000.5:g200008.dat) 
                                            0.66985646 
                 NBCLAT.NBC-1995 (df1995:Nbcw4063.dat) 
                                            0.68259386 
                 Gallup.Gallup-1972 (df1972.5:860.dat) 
                                            0.68446270 
           Gallup.Gallup-2002 (df2002:2002_06_21x.por) 
                                            0.68627451 
             Gallup.Gallup-2012 (df2012.5:g201208.por) 
                                            0.69721116 
              Gallup.Gallup-1957 (df1957:aipo0588.dat) 
                                            0.71989529 
                   ABC.ABC-1982 (df1982.5:Awp7598.dat) 
                                            0.72944297 
                 NBCLAT.LAT-1995 (df1995.5:LAT369.DAT) 
                                            0.77138850 
              Roper.Roper-1981 (df1981.5:rprr8101.dat) 
                                            0.77639752 
            Gallup.Gallup-1999 (df1999.7:A9906032.dat) 
                                            0.78740157 
                     ABC.ABC-2005 (df2005.5:aw985.por) 
                                            0.79681275 
              Roper.Roper-1975 (df1975.5:rprr7501.dat) 
                                            0.79800499 
                 Gallup.Gallup-1988 (df1988:Ai875.dat) 
                                            0.79920080 
               Gallup.Gallup-1989 (df1989.5:89135.dat) 
                                            0.80971660 
             Gallup.Gallup-1992 (df1992.5:g205115.por) 
                                            0.82236842 
              Roper.Roper-1974 (df1974.5:rprr7401.dat) 
                                            0.84745763 
               Gallup.Gallup-2013 (df2013:g201307.por) 
                                            0.85022891 
                                        ANES.ANES-2002 
                                            0.86035738 
                    Time.Time-1994 (df1994:y94012.por) 
                                            0.86633663 
               Gallup.Gallup-1990 (df1990:g922017.DAT) 
                                            0.87378641 
           Gallup.Gallup-2009 (df2009.5:usa200921.por) 
                                            0.88495575 
              Gallup.Gallup-1989 (df1989:Oct89nii.dat) 
                                            0.89141005 
               Gallup.Gallup-2007 (df2007:a200721.por) 
                                            0.89374379 
                        ABC.ABC-2006 (df2006:1015.por) 
                                            0.90000000 
                    Time.Time-1995 (df1995:y95002.por) 
                                            0.90000000 
            Gallup.Gallup-1994 (df1994.5:A0807010.DAT) 
                                            0.91463415 
                Roper.Roper-1976 (df1976.5:rr7601.por) 
                                            0.94952524 
           Gallup.Gallup-2012 (df2012.7:usa201219.por) 
                                            0.98522167 
                    Time.Time-1993 (df1993:y85212.dat) 
                                            1.00000000 
                 NBCLAT.NBC-1985 (df1985:n1985may.por) 
                                            1.00187852 
           Gallup.Gallup-2013 (df2013.7:usa201222.dat) 
                                            1.05973025 
             Gallup.Gallup-1991 (df1991.7:g222006.por) 
                                            1.06820049 
                                        ANES.ANES-1986 
                                            1.10294118 
            Gallup.Gallup-1969 (df1969.7:aipo6955.dat) 
                                            1.12589560 
                                        ANES.ANES-2012 
                                            1.14981400 
                                        ANES.ANES-1958 
                                            1.17241379 
                                        ANES.ANES-1974 
                                            1.20634921 
           Gallup.Gallup-2010 (df2010.5:usa201021.por) 
                                            1.25361620 
               Gallup.Gallup-2010 (df2010:g201009.por) 
                                            1.27450980 
               Gallup.Gallup-2014 (df2014:g201411.por) 
                                            1.35782748 
             Gallup.Gallup-2014 (df2014.5:g201407.por) 
                                            1.36319377 
              Roper.Roper-1979 (df1979.5:Roper791.dat) 
                                            1.38960371 
                                        ANES.ANES-1990 
                                            1.66666667 
                Roper.Roper-1971 (df1971:rcom0524.dat) 
                                            1.66777852 
                                        ANES.ANES-1988 
                                            1.81372549 
                  Time.Time-1995 (df1995.5:y95015.por) 
                                            2.00000000 
                                        ANES.ANES-1994 
                                            2.11699164 
                    Time.Time-1997 (df1997:y97006.por) 
                                            2.14843750 
                                        ANES.ANES-1992 
                                            2.61569416 
                 Time.Time-2003 (df2003:time01_03.dat) 
                                            2.97029703 
                                        ANES.ANES-1952 
                                            5.73986309 
             Gallup.Gallup-2013 (df2013.5:g201321.dat) 
                                           13.38506305 
                 Time.Time-1989 (df1989:y19985415.por) 
                                           16.66666667 
                                        ANES.ANES-1948 
                                          100.00000000 

> ageNAs<-lapply(allpolls,function(poll) {
+   #poll<-abcdfs
+   tapply(poll$age,poll$id,function(x) {
+     100*sum(is.na(x))/length(x)
+   })
+ }) %>% unlist %>% sort

> ageNAs
                                        ANES.ANES-1958 
                                            0.00000000 
                                        ANES.ANES-1990 
                                            0.00000000 
                                        ANES.ANES-1992 
                                            0.00000000 
                                        ANES.ANES-1994 
                                            0.00000000 
                                        ANES.ANES-2004 
                                            0.00000000 
                   Gallup.Gallup-1956 (df1956:562.dat) 
                                            0.00000000 
                                          GSS.GSS-1990 
                                            0.00000000 
                Roper.Roper-1971 (df1971:rcom0524.dat) 
                                            0.00000000 
                Roper.Roper-1973 (df1973:rprr7309.dat) 
                                            0.00000000 
              Roper.Roper-1974 (df1974.5:rprr7401.dat) 
                                            0.00000000 
                Roper.Roper-1974 (df1974:rprr7409.dat) 
                                            0.00000000 
              Roper.Roper-1975 (df1975.5:rprr7501.dat) 
                                            0.00000000 
                Roper.Roper-1975 (df1975:rprr7509.dat) 
                                            0.00000000 
                Roper.Roper-1976 (df1976.5:rr7601.por) 
                                            0.00000000 
                Roper.Roper-1976 (df1976:rprr7609.dat) 
                                            0.00000000 
                Roper.Roper-1977 (df1977:ROPER771.DAT) 
                                            0.00000000 
                Roper.Roper-1978 (df1978.5:RR7801.DAT) 
                                            0.00000000 
              Roper.Roper-1979 (df1979.5:Roper791.dat) 
                                            0.00000000 
                Roper.Roper-1980 (df1980:rprr8001.dat) 
                                            0.00000000 
                                        ANES.ANES-1960 
                                            0.08467401 
                                        ANES.ANES-1996 
                                            0.11668611 
                                        ANES.ANES-1980 
                                            0.12391574 
              Gallup.Gallup-1982 (df1982:aipo1202.dat) 
                                            0.13458950 
                                        ANES.ANES-1986 
                                            0.13786765 
                                        ANES.ANES-1982 
                                            0.14104372 
                                          GSS.GSS-1998 
                                            0.14124294 
                                          GSS.GSS-2010 
                                            0.14677104 
                                        ANES.ANES-1988 
                                            0.14705882 
               Time.Time-1989 (df1989.5:y19895406.dat) 
                                            0.19762846 
                                          GSS.GSS-1991 
                                            0.19775873 
                                          GSS.GSS-1994 
                                            0.20053476 
                                          GSS.GSS-1996 
                                            0.20661157 
                                          GSS.GSS-2012 
                                            0.25329281 
                Roper.Roper-1979 (df1979:rprr7909.dat) 
                                            0.25693731 
                                          GSS.GSS-1989 
                                            0.26024723 
                                          GSS.GSS-1973 
                                            0.26595745 
              Gallup.Gallup-1968 (df1968:aipo0757.dat) 
                                            0.26666667 
                                          GSS.GSS-1988 
                                            0.27008778 
                                          GSS.GSS-2000 
                                            0.28399006 
              Gallup.Gallup-1998 (df1998:A9806017.DAT) 
                                            0.29910269 
                                          GSS.GSS-1972 
                                            0.30998140 
                                          GSS.GSS-1993 
                                            0.31133250 
                                        ANES.ANES-1964 
                                            0.31826862 
                  Gallup.Gallup-1986 (df1986:1261.dat) 
                                            0.31867431 
                                          GSS.GSS-2004 
                                            0.32005690 
                   Gallup.Gallup-1978 (df1978:995.dat) 
                                            0.32051282 
                                        ANES.ANES-1968 
                                            0.32113038 
                  Gallup.Gallup-1985 (df1985:1248.dat) 
                                            0.32851511 
                                          GSS.GSS-1975 
                                            0.33557047 
                                          GSS.GSS-2014 
                                            0.35460993 
            Gallup.Gallup-1994 (df1994.95:A422039.DAT) 
                                            0.38610039 
                Gallup.Gallup-1982 (df1982.5:1189.dat) 
                                            0.39708802 
             Gallup.Gallup-1993 (df1993.5:A322052.DAT) 
                                            0.39880359 
                                          GSS.GSS-2006 
                                            0.39911308 
                                          GSS.GSS-1976 
                                            0.40026684 
                                          GSS.GSS-1974 
                                            0.40431267 
                                          GSS.GSS-1984 
                                            0.40733198 
                     CBS.CBS-1994 (df1994:CJUN94A.DAT) 
                                            0.40899796 
                                        ANES.ANES-1978 
                                            0.43402778 
                                          GSS.GSS-1983 
                                            0.43777361 
                                          GSS.GSS-1985 
                                            0.45632334 
                                          GSS.GSS-1978 
                                            0.45691906 
                                          GSS.GSS-1977 
                                            0.45751634 
                                          GSS.GSS-1986 
                                            0.47619048 
                 Gallup.Gallup-1969 (df1969.5:773.dat) 
                                            0.47912389 
              Gallup.Gallup-1994 (df1994:A0807020.DAT) 
                                            0.48923679 
                                          GSS.GSS-2008 
                                            0.49431537 
              Gallup.Gallup-1985 (df1985.5:g85064.por) 
                                            0.49554014 
                                        ANES.ANES-2000 
                                            0.49806309 
                                          GSS.GSS-2002 
                                            0.50632911 
                   Gallup.Gallup-1972 (df1972:846.dat) 
                                            0.52875083 
                                        ANES.ANES-1962 
                                            0.53970702 
             Gallup.Gallup-1999 (df1999.5:9903015.dat) 
                                            0.58765916 
             Gallup.Gallup-2000 (df2000.9:g200025.dat) 
                                            0.58765916 
               Gallup.Gallup-2000 (df2000:a200026.dat) 
                                            0.58823529 
              Gallup.Gallup-1996 (df1996:A9605012.DAT) 
                                            0.58881256 
               Gallup.Gallup-1995 (df1995.5:A018A.DAT) 
                                            0.59523810 
                                        ANES.ANES-1970 
                                            0.59721301 
                                          GSS.GSS-1980 
                                            0.61307902 
                                        ANES.ANES-1976 
                                            0.62277580 
                                        ANES.ANES-1972 
                                            0.62846580 
              Gallup.Gallup-1992 (df1992:life1992.dat) 
                                            0.65573770 
             Gallup.Gallup-1992 (df1992.5:g205115.por) 
                                            0.65789474 
                                          GSS.GSS-1987 
                                            0.65970313 
                   NBCLAT.LAT-1993 (df1993:Lat322.dat) 
                                            0.67069081 
                   Time.Time-2001 (df2001:y200105.dat) 
                                            0.68965517 
            Gallup.Gallup-1998 (df1998.5:A9810040.DAT) 
                                            0.69101678 
            Gallup.Gallup-1997 (df1997.5:g9707017.DAT) 
                                            0.69721116 
                                        ANES.ANES-2002 
                                            0.72799471 
               Gallup.Gallup-1997 (df1997:9708018.dat) 
                                            0.73260073 
                      ABC.ABC-1995 (df1995:aw5514.dat) 
                                            0.78125000 
               Gallup.Gallup-1995 (df1995:5001020.DAT) 
                                            0.80000000 
                    Time.Time-1993 (df1993:y85212.dat) 
                                            0.80000000 
                                          GSS.GSS-1982 
                                            0.80645161 
                                        ANES.ANES-1966 
                                            0.85205267 
             Gallup.Gallup-1992 (df1992.7:g322014.por) 
                                            0.86517664 
                  Gallup.Gallup-1981 (df1981:1168.dat) 
                                            0.87010566 
         Gallup.Gallup-2003 (df2003.5:2003_06_09x.por) 
                                            0.87463557 
               Gallup.Gallup-2011 (df2011:g200111.por) 
                                            0.88235294 
              Gallup.Gallup-1993 (df1993:g422017B.DAT) 
                                            0.88424437 
                                        ANES.ANES-1984 
                                            0.88613203 
             Gallup.Gallup-1995 (df1995.7:9509012.DAT) 
                                            0.89020772 
            Gallup.Gallup-1994 (df1994.5:A0807010.DAT) 
                                            0.91463415 
            Gallup.Gallup-1967 (df1967.5:aipo0749.dat) 
                                            0.93590471 
                     ABC.ABC-1982 (df1982:ABC7871.DAT) 
                                            0.97402597 
              Gallup.Gallup-1957 (df1957:aipo0588.dat) 
                                            0.98167539 
              Gallup.Gallup-2001 (df2001:g2001-06.por) 
                                            0.98425197 
               Gallup.Gallup-2006 (df2006:g200618.dat) 
                                            0.98716683 
             Gallup.Gallup-1991 (df1991.5:G122021.dat) 
                                            0.99700897 
                    Time.Time-1995 (df1995:y95002.por) 
                                            1.00000000 
                                        ANES.ANES-2012 
                                            1.01454177 
                 NBCLAT.NBC-1995 (df1995:Nbcw4063.dat) 
                                            1.09215017 
               Gallup.Gallup-2005 (df2005:g200524.por) 
                                            1.09561753 
                   Gallup.Gallup-1976 (df1976:949.dat) 
                                            1.10389610 
                     ABC.ABC-1981 (df1981:AWP8846.DAT) 
                                            1.10893673 
               Gallup.Gallup-1991 (df1991:g222002.dat) 
                                            1.11111111 
                 NBCLAT.NBC-1985 (df1985:n1985may.por) 
                                            1.12711334 
               Gallup.Gallup-1989 (df1989.5:89135.dat) 
                                            1.13360324 
                                        ANES.ANES-1956 
                                            1.13507378 
               Gallup.Gallup-1999 (df1999:9902009.dat) 
                                            1.13851992 
             Gallup.Gallup-1991 (df1991.7:g222006.por) 
                                            1.15036976 
               Gallup.Gallup-1990 (df1990:g922017.DAT) 
                                            1.16504854 
                   NBCLAT.LAT-1994 (df1994:Lat328.dat) 
                                            1.18733509 
             Gallup.Gallup-2006 (df2006.5:g200621.por) 
                                            1.19760479 
                                        ANES.ANES-1948 
                                            1.20845921 
               Gallup.Gallup-2008 (df2008:g200824.por) 
                                            1.21654501 
                                        ANES.ANES-1998 
                                            1.24902420 
Gallup.Gallup-1969 (df1969:aipo0774.dat,aipo0774S.dat) 
                                            1.26413839 
                                        ANES.ANES-1974 
                                            1.26984127 
                       ABC.ABC-2005 (df2005:aw980.por) 
                                            1.29390018 
              Gallup.Gallup-1989 (df1989:Oct89nii.dat) 
                                            1.29659643 
                 Gallup.Gallup-1988 (df1988:Ai875.dat) 
                                            1.29870130 
                   Gallup.Gallup-1960 (df1960:625.dat) 
                                            1.30653266 
               Gallup.Gallup-2013 (df2013:g201307.por) 
                                            1.30804447 
             Gallup.Gallup-2000 (df2000.5:g200008.dat) 
                                            1.33971292 
            Gallup.Gallup-1999 (df1999.7:A9906032.dat) 
                                            1.37795276 
             Gallup.Gallup-2011 (df2011.5:g201121.por) 
                                            1.38339921 
            Gallup.Gallup-1994 (df1994.9:A0807018.DAT) 
                                            1.38476756 
             Gallup.Gallup-2001 (df2001.5:a200121.dat) 
                                            1.38476756 
               Gallup.Gallup-2004 (df2004:g200418.por) 
                                            1.39720559 
                     ABC.ABC-2003 (df2003:abcw909.por) 
                                            1.41218005 
           Gallup.Gallup-2013 (df2013.7:usa201222.dat) 
                                            1.44508671 
            Gallup.Gallup-1965 (df1965.7:AIPO0716.dat) 
                                            1.44680851 
           Gallup.Gallup-2002 (df2002:2002_06_21x.por) 
                                            1.47058824 
               Gallup.Gallup-2009 (df2009:g200911.por) 
                                            1.48367953 
                 Gallup.Gallup-1972 (df1972.5:860.dat) 
                                            1.50581793 
                   ABC.ABC-1982 (df1982.5:Awp7598.dat) 
                                            1.52519894 
              Gallup.Gallup-1966 (df1966:aipo0729.dat) 
                                            1.64819551 
              Gallup.Gallup-1967 (df1967:aipo0746.dat) 
                                            1.65533550 
                 Time.Time-2003 (df2003:time01_03.dat) 
                                            1.71017102 
             Gallup.Gallup-2014 (df2014.5:g201407.por) 
                                            1.75267770 
             Gallup.Gallup-2000 (df2000.7:a200010.dat) 
                                            1.75438596 
           Gallup.Gallup-2012 (df2012.7:usa201219.por) 
                                            1.77339901 
           Gallup.Gallup-2003 (df2003:2003_05_19x.dat) 
                                            1.77514793 
             Gallup.Gallup-2012 (df2012.5:g201208.por) 
                                            1.79282869 
                 NBCLAT.LAT-1995 (df1995.5:LAT369.DAT) 
                                            1.82328191 
               Gallup.Gallup-2014 (df2014:g201411.por) 
                                            1.83706070 
              Gallup.Gallup-1955 (df1955:aipo0550.dat) 
                                            1.86113099 
               Gallup.Gallup-2007 (df2007:a200721.por) 
                                            1.88679245 
                                        ANES.ANES-2008 
                                            1.93798450 
             Gallup.Gallup-2013 (df2013.5:g201321.dat) 
                                            1.93986421 
                   Gallup.Gallup-1965 (df1965:704.dat) 
                                            2.00458190 
           Gallup.Gallup-2010 (df2010.5:usa201021.por) 
                                            2.12150434 
                  Time.Time-1995 (df1995.5:y95015.por) 
                                            2.12500000 
               Gallup.Gallup-2010 (df2010:g201009.por) 
                                            2.15686275 
                     ABC.ABC-2005 (df2005.5:aw985.por) 
                                            2.19123506 
           Gallup.Gallup-2009 (df2009.5:usa200921.por) 
                                            2.26155359 
                        ABC.ABC-2006 (df2006:1015.por) 
                                            2.30000000 
                    Time.Time-1997 (df1997:y97006.por) 
                                            2.44140625 
                   Gallup.Gallup-1971 (df1971:839.dat) 
                                            2.63157895 
            Gallup.Gallup-1969 (df1969.7:aipo6955.dat) 
                                            3.07062436 
                 Gallup.Gallup-1965 (df1965.5:709.dat) 
                                            4.33182333 
                    CBS.CBS-2012 (df2012:c201207a.por) 
                                            4.40771350 
                                        ANES.ANES-1952 
                                            6.63507109 
                  Roper.Roper-1986 (df1986:rr8601.dat) 
                                            6.71717172 
                  Roper.Roper-1987 (df1987:rr8701.DAT) 
                                            6.86029044 
                Roper.Roper-1982 (df1982.5:rr8201.dat) 
                                            7.24346076 
              Roper.Roper-1981 (df1981.5:rprr8101.dat) 
                                            7.40165631 
                Roper.Roper-1983 (df1983:rprr8309.dat) 
                                            7.55000000 
              Roper.Roper-1984 (df1984.5:rprr8401.por) 
                                            7.65000000 
                Roper.Roper-1982 (df1982:rprr8209.dat) 
                                            8.00000000 
                  Roper.Roper-1985 (df1985:rr8501.por) 
                                            8.04655870 
                Roper.Roper-1984 (df1984:rprr8402.dat) 
                                            8.05000000 
              Roper.Roper-1983 (df1983.5:rprr8301.dat) 
                                            8.15000000 
                Roper.Roper-1981 (df1981:rprr8102.dat) 
                                            8.35866261 
                 Gallup.Gallup-1972 (df1972.7:861.dat) 
                                            8.64361702 
               Roper.Roper-1978 (df1978:ROPER7802.dat) 
                                            8.99100899 
                    Time.Time-1994 (df1994:y94012.por) 
                                           13.86138614 
                 Time.Time-1989 (df1989:y19985415.por) 
                                           16.66666667 
                                        ANES.ANES-1954 
                                          100.00000000 

> ############################################################# 
> ############################################################# 
> 
> #does each polldf have the same set of IV's?
> ivs<-lapply(allpolls,function(poll) {
+   #poll<-abcdfs
+   allnames<-names(poll)
+   tmpregex<-"\\_(n|p|np|a|na|m|nm)(t)?$"
+   tmpselector<-!str_detect(allnames,tmpregex)
+   allivs<-allnames[tmpselector]
+   return(allivs)
+ }); names(ivs)<-names(allpolls)

> #take a look at those ivs which are not in all df's
> tmp<-sort(table(ivs %>% 
+                   unlist),
+           decreasing=T)

> probs<-tmp[tmp<8] %>% 
+   names #these vars don't appear in all df's

> #which dfs do they not appear in?
> ivs.missing<-lapply(probs,function(prob) {
+   #prob<-"female"
+   present<-sapply(allpolls,function(df) {
+     prob%in%names(df)
+   })
+   missing<-names(allpolls)[!present]
+   return(missing)
+ })

> names(ivs.missing)<-probs

> #story is that we don't have state info for GSS and NBCLAT
> #(and also for a subset of the data in other polls)
> 
> ############################################################# 
> ############################################################# 
> 
> #now, check the distribution and type of each IV
> #for each, I'll have different concerns
> #"birthyear","black","age","female","ed_f"
> 
> iv<-"age"

> lapply(allpolls,function(poll) {
+   #poll<-abcdfs  
+   tapply(poll[[iv]],poll$id,function(x) {
+     !(class(x)=="integer" | class(x)=="numeric")
+   }) %>% sum
+ }) #always integer or numeric
$ABC
[1] 0

$ANES
[1] 0

$CBS
[1] 0

$Gallup
[1] 0

$GSS
[1] 0

$NBCLAT
[1] 0

$Roper
[1] 0

$Time
[1] 0


> lapply(allpolls,function(poll) {
+   poll[[iv]][poll[[iv]]<18 & !is.na(poll[[iv]])] %>% table
+ }) #just in the ANEs
$ABC
< table of extent 0 >

$ANES
.
17 
21 

$CBS
< table of extent 0 >

$Gallup
< table of extent 0 >

$GSS
< table of extent 0 >

$NBCLAT
< table of extent 0 >

$Roper
< table of extent 0 >

$Time
< table of extent 0 >


> iv<-"birthyear"

> lapply(allpolls,function(poll) {
+   #poll<-abcdfs  
+   tapply(poll[[iv]],poll$id,function(x) {
+     !(class(x)=="integer" | class(x)=="numeric")
+   }) %>% sum
+ }) #always integer or numeric
$ABC
[1] 0

$ANES
[1] 0

$CBS
[1] 0

$Gallup
[1] 0

$GSS
[1] 0

$NBCLAT
[1] 0

$Roper
[1] 0

$Time
[1] 0


> lapply(allpolls,function(poll) {
+   range(poll[[iv]],na.rm=T)
+ }) #believable range
$ABC
[1] 1886 1988

$ANES
[1] 1853 1995

$CBS
[1] 1899 1994

$Gallup
[1] 1859 1996

$GSS
[1] 1883 1996

$NBCLAT
[1] 1900 1977

$Roper
[1] 1906.0 1967.5

$Time
[1] 1919 1982


> iv<-"black"

> lapply(allpolls,function(poll) {
+   #poll<-abcdfs  
+   tapply(poll[[iv]],poll$id,unique) #in some cases NA's
+ }) #no zero's anywhere
$ABC
$ABC$`ABC-1981 (df1981:AWP8846.DAT)`
[1] 0 1

$ABC$`ABC-1982 (df1982.5:Awp7598.dat)`
[1] 0 1

$ABC$`ABC-1982 (df1982:ABC7871.DAT)`
[1] 0 1

$ABC$`ABC-1995 (df1995:aw5514.dat)`
[1] 0 1

$ABC$`ABC-2003 (df2003:abcw909.por)`
[1]  0  1 NA

$ABC$`ABC-2005 (df2005.5:aw985.por)`
[1]  1  0 NA

$ABC$`ABC-2005 (df2005:aw980.por)`
[1]  0  1 NA

$ABC$`ABC-2006 (df2006:1015.por)`
[1]  0  1 NA


$ANES
$ANES$`ANES-1948`
[1]  0  1 NA

$ANES$`ANES-1952`
[1]  0  1 NA

$ANES$`ANES-1954`
[1]  0  1 NA

$ANES$`ANES-1956`
[1] 0 1

$ANES$`ANES-1958`
[1] 0 1

$ANES$`ANES-1960`
[1] 0 1

$ANES$`ANES-1962`
[1]  1  0 NA

$ANES$`ANES-1964`
[1] 0 1

$ANES$`ANES-1966`
[1] 0 1

$ANES$`ANES-1968`
[1] 0 1

$ANES$`ANES-1970`
[1] 0 1

$ANES$`ANES-1972`
[1]  0  1 NA

$ANES$`ANES-1974`
[1]  0  1 NA

$ANES$`ANES-1976`
[1]  0  1 NA

$ANES$`ANES-1978`
[1]  0  1 NA

$ANES$`ANES-1980`
[1]  0  1 NA

$ANES$`ANES-1982`
[1]  0  1 NA

$ANES$`ANES-1984`
[1]  0  1 NA

$ANES$`ANES-1986`
[1]  0  1 NA

$ANES$`ANES-1988`
[1]  0  1 NA

$ANES$`ANES-1990`
[1]  0  1 NA

$ANES$`ANES-1992`
[1]  0  1 NA

$ANES$`ANES-1994`
[1]  0 NA  1

$ANES$`ANES-1996`
[1]  1  0 NA

$ANES$`ANES-1998`
[1]  0  1 NA

$ANES$`ANES-2000`
[1]  0  1 NA

$ANES$`ANES-2002`
[1] NA  1  0

$ANES$`ANES-2004`
[1]  0  1 NA

$ANES$`ANES-2008`
[1]  0  1 NA

$ANES$`ANES-2012`
[1]  1  0 NA


$CBS
$CBS$`CBS-1994 (df1994:CJUN94A.DAT)`
[1]  1  0 NA

$CBS$`CBS-2012 (df2012:c201207a.por)`
[1]  0  1 NA


$Gallup
$Gallup$`Gallup-1955 (df1955:aipo0550.dat)`
[1]  0  1 NA

$Gallup$`Gallup-1956 (df1956:562.dat)`
[1]  1  0 NA

$Gallup$`Gallup-1957 (df1957:aipo0588.dat)`
[1]  0  1 NA

$Gallup$`Gallup-1960 (df1960:625.dat)`
[1]  0  1 NA

$Gallup$`Gallup-1965 (df1965.5:709.dat)`
[1] 0 1

$Gallup$`Gallup-1965 (df1965.7:AIPO0716.dat)`
[1] 0 1

$Gallup$`Gallup-1965 (df1965:704.dat)`
[1] 0 1

$Gallup$`Gallup-1966 (df1966:aipo0729.dat)`
[1] 0 1

$Gallup$`Gallup-1967 (df1967.5:aipo0749.dat)`
[1]  0  1 NA

$Gallup$`Gallup-1967 (df1967:aipo0746.dat)`
[1] 0 1

$Gallup$`Gallup-1968 (df1968:aipo0757.dat)`
[1] 0 1

$Gallup$`Gallup-1969 (df1969.5:773.dat)`
[1] 0 1

$Gallup$`Gallup-1969 (df1969.7:aipo6955.dat)`
[1] 1

$Gallup$`Gallup-1969 (df1969:aipo0774.dat,aipo0774S.dat)`
[1] 0 1

$Gallup$`Gallup-1971 (df1971:839.dat)`
[1] 0 1

$Gallup$`Gallup-1972 (df1972.5:860.dat)`
[1] 0 1

$Gallup$`Gallup-1972 (df1972.7:861.dat)`
[1] 0 1

$Gallup$`Gallup-1972 (df1972:846.dat)`
[1] 0 1

$Gallup$`Gallup-1976 (df1976:949.dat)`
[1] 0 1

$Gallup$`Gallup-1978 (df1978:995.dat)`
[1] 0 1

$Gallup$`Gallup-1981 (df1981:1168.dat)`
[1] 0 1

$Gallup$`Gallup-1982 (df1982.5:1189.dat)`
[1] 0 1

$Gallup$`Gallup-1982 (df1982:aipo1202.dat)`
[1] 0 1

$Gallup$`Gallup-1985 (df1985.5:g85064.por)`
[1]  1  0 NA

$Gallup$`Gallup-1985 (df1985:1248.dat)`
[1] 0 1

$Gallup$`Gallup-1986 (df1986:1261.dat)`
[1] 0 1

$Gallup$`Gallup-1988 (df1988:Ai875.dat)`
[1]  1  0 NA

$Gallup$`Gallup-1989 (df1989.5:89135.dat)`
[1]  0  1 NA

$Gallup$`Gallup-1989 (df1989:Oct89nii.dat)`
[1]  0  1 NA

$Gallup$`Gallup-1990 (df1990:g922017.DAT)`
[1] 0 1

$Gallup$`Gallup-1991 (df1991.5:G122021.dat)`
[1]  0  1 NA

$Gallup$`Gallup-1991 (df1991.7:g222006.por)`
[1]  0  1 NA

$Gallup$`Gallup-1991 (df1991:g222002.dat)`
[1]  0  1 NA

$Gallup$`Gallup-1992 (df1992.5:g205115.por)`
[1]  1  0 NA

$Gallup$`Gallup-1992 (df1992.7:g322014.por)`
[1]  0  1 NA

$Gallup$`Gallup-1992 (df1992:life1992.dat)`
[1]  0  1 NA

$Gallup$`Gallup-1993 (df1993.5:A322052.DAT)`
[1]  0 NA  1

$Gallup$`Gallup-1993 (df1993:g422017B.DAT)`
[1]  0  1 NA

$Gallup$`Gallup-1994 (df1994.5:A0807010.DAT)`
[1]  0  1 NA

$Gallup$`Gallup-1994 (df1994.9:A0807018.DAT)`
[1]  0  1 NA

$Gallup$`Gallup-1994 (df1994.95:A422039.DAT)`
[1]  0  1 NA

$Gallup$`Gallup-1994 (df1994:A0807020.DAT)`
[1]  0  1 NA

$Gallup$`Gallup-1995 (df1995.5:A018A.DAT)`
[1]  0  1 NA

$Gallup$`Gallup-1995 (df1995.7:9509012.DAT)`
[1]  0  1 NA

$Gallup$`Gallup-1995 (df1995:5001020.DAT)`
[1]  0  1 NA

$Gallup$`Gallup-1996 (df1996:A9605012.DAT)`
[1]  0  1 NA

$Gallup$`Gallup-1997 (df1997.5:g9707017.DAT)`
[1]  0  1 NA

$Gallup$`Gallup-1997 (df1997:9708018.dat)`
[1]  0  1 NA

$Gallup$`Gallup-1998 (df1998.5:A9810040.DAT)`
[1]  0  1 NA

$Gallup$`Gallup-1998 (df1998:A9806017.DAT)`
[1]  0  1 NA

$Gallup$`Gallup-1999 (df1999.5:9903015.dat)`
[1]  0  1 NA

$Gallup$`Gallup-1999 (df1999.7:A9906032.dat)`
[1]  0  1 NA

$Gallup$`Gallup-1999 (df1999:9902009.dat)`
[1]  0  1 NA

$Gallup$`Gallup-2000 (df2000.5:g200008.dat)`
[1]  0  1 NA

$Gallup$`Gallup-2000 (df2000.7:a200010.dat)`
[1]  0  1 NA

$Gallup$`Gallup-2000 (df2000.9:g200025.dat)`
[1]  0 NA  1

$Gallup$`Gallup-2000 (df2000:a200026.dat)`
[1]  0  1 NA

$Gallup$`Gallup-2001 (df2001.5:a200121.dat)`
[1]  0  1 NA

$Gallup$`Gallup-2001 (df2001:g2001-06.por)`
[1]  0 NA  1

$Gallup$`Gallup-2002 (df2002:2002_06_21x.por)`
[1]  0  1 NA

$Gallup$`Gallup-2003 (df2003.5:2003_06_09x.por)`
[1]  0  1 NA

$Gallup$`Gallup-2003 (df2003:2003_05_19x.dat)`
[1]  0  1 NA

$Gallup$`Gallup-2004 (df2004:g200418.por)`
[1]  0 NA  1

$Gallup$`Gallup-2005 (df2005:g200524.por)`
[1]  0  1 NA

$Gallup$`Gallup-2006 (df2006.5:g200621.por)`
[1]  0  1 NA

$Gallup$`Gallup-2006 (df2006:g200618.dat)`
[1]  0  1 NA

$Gallup$`Gallup-2007 (df2007:a200721.por)`
[1]  0  1 NA

$Gallup$`Gallup-2008 (df2008:g200824.por)`
[1]  0  1 NA

$Gallup$`Gallup-2009 (df2009.5:usa200921.por)`
[1]  0  1 NA

$Gallup$`Gallup-2009 (df2009:g200911.por)`
[1]  0  1 NA

$Gallup$`Gallup-2010 (df2010.5:usa201021.por)`
[1]  1  0 NA

$Gallup$`Gallup-2010 (df2010:g201009.por)`
[1]  0  1 NA

$Gallup$`Gallup-2011 (df2011.5:g201121.por)`
[1]  0  1 NA

$Gallup$`Gallup-2011 (df2011:g200111.por)`
[1]  0  1 NA

$Gallup$`Gallup-2012 (df2012.5:g201208.por)`
[1]  0  1 NA

$Gallup$`Gallup-2012 (df2012.7:usa201219.por)`
[1]  0  1 NA

$Gallup$`Gallup-2013 (df2013.5:g201321.dat)`
[1]  0 NA  1

$Gallup$`Gallup-2013 (df2013.7:usa201222.dat)`
[1]  0  1 NA

$Gallup$`Gallup-2013 (df2013:g201307.por)`
[1] NA  1  0

$Gallup$`Gallup-2014 (df2014.5:g201407.por)`
[1]  0  1 NA

$Gallup$`Gallup-2014 (df2014:g201411.por)`
[1]  0  1 NA


$GSS
$GSS$`GSS-1972`
[1] 0 1

$GSS$`GSS-1973`
[1] 0 1

$GSS$`GSS-1974`
[1] 0 1

$GSS$`GSS-1975`
[1] 0 1

$GSS$`GSS-1976`
[1] 0 1

$GSS$`GSS-1977`
[1] 0 1

$GSS$`GSS-1978`
[1] 0 1

$GSS$`GSS-1980`
[1] 0 1

$GSS$`GSS-1982`
[1] 0 1

$GSS$`GSS-1983`
[1] 0 1

$GSS$`GSS-1984`
[1] 0 1

$GSS$`GSS-1985`
[1] 0 1

$GSS$`GSS-1986`
[1] 0 1

$GSS$`GSS-1987`
[1] 0 1

$GSS$`GSS-1988`
[1] 0 1

$GSS$`GSS-1989`
[1] 0 1

$GSS$`GSS-1990`
[1] 0 1

$GSS$`GSS-1991`
[1] 0 1

$GSS$`GSS-1993`
[1] 0 1

$GSS$`GSS-1994`
[1] 1 0

$GSS$`GSS-1996`
[1] 0 1

$GSS$`GSS-1998`
[1] 0 1

$GSS$`GSS-2000`
[1] 0 1

$GSS$`GSS-2002`
[1] 0 1

$GSS$`GSS-2004`
[1] 0 1

$GSS$`GSS-2006`
[1] 1 0

$GSS$`GSS-2008`
[1] 0 1

$GSS$`GSS-2010`
[1] 0 1

$GSS$`GSS-2012`
[1] 0 1

$GSS$`GSS-2014`
[1] 0 1


$NBCLAT
$NBCLAT$`LAT-1993 (df1993:Lat322.dat)`
[1] 0 1

$NBCLAT$`LAT-1994 (df1994:Lat328.dat)`
[1] 1 0

$NBCLAT$`LAT-1995 (df1995.5:LAT369.DAT)`
[1] 0 1

$NBCLAT$`NBC-1985 (df1985:n1985may.por)`
[1]  0  1 NA

$NBCLAT$`NBC-1995 (df1995:Nbcw4063.dat)`
[1]  0  1 NA


$Roper
$Roper$`Roper-1971 (df1971:rcom0524.dat)`
[1] 0 1

$Roper$`Roper-1973 (df1973:rprr7309.dat)`
[1]  0  1 NA

$Roper$`Roper-1974 (df1974.5:rprr7401.dat)`
[1]  0  1 NA

$Roper$`Roper-1974 (df1974:rprr7409.dat)`
[1] 0 1

$Roper$`Roper-1975 (df1975.5:rprr7501.dat)`
[1] 0 1

$Roper$`Roper-1975 (df1975:rprr7509.dat)`
[1] 0 1

$Roper$`Roper-1976 (df1976.5:rr7601.por)`
[1] 1 0

$Roper$`Roper-1976 (df1976:rprr7609.dat)`
[1]  0  1 NA

$Roper$`Roper-1977 (df1977:ROPER771.DAT)`
[1]  0  1 NA

$Roper$`Roper-1978 (df1978.5:RR7801.DAT)`
[1]  0  1 NA

$Roper$`Roper-1978 (df1978:ROPER7802.dat)`
[1]  0  1 NA

$Roper$`Roper-1979 (df1979.5:Roper791.dat)`
[1]  0  1 NA

$Roper$`Roper-1979 (df1979:rprr7909.dat)`
[1]  0  1 NA

$Roper$`Roper-1980 (df1980:rprr8001.dat)`
[1]  0  1 NA

$Roper$`Roper-1981 (df1981.5:rprr8101.dat)`
[1]  0  1 NA

$Roper$`Roper-1981 (df1981:rprr8102.dat)`
[1]  0  1 NA

$Roper$`Roper-1982 (df1982.5:rr8201.dat)`
[1]  0  1 NA

$Roper$`Roper-1982 (df1982:rprr8209.dat)`
[1]  0  1 NA

$Roper$`Roper-1983 (df1983.5:rprr8301.dat)`
[1] 0 1

$Roper$`Roper-1983 (df1983:rprr8309.dat)`
[1]  0  1 NA

$Roper$`Roper-1984 (df1984.5:rprr8401.por)`
[1]  0  1 NA

$Roper$`Roper-1984 (df1984:rprr8402.dat)`
[1]  0  1 NA

$Roper$`Roper-1985 (df1985:rr8501.por)`
[1]  0  1 NA

$Roper$`Roper-1986 (df1986:rr8601.dat)`
[1]  0  1 NA

$Roper$`Roper-1987 (df1987:rr8701.DAT)`
[1]  0  1 NA


$Time
$Time$`Time-1989 (df1989.5:y19895406.dat)`
[1]  0  1 NA

$Time$`Time-1989 (df1989:y19985415.por)`
[1]  0  1 NA

$Time$`Time-1993 (df1993:y85212.dat)`
[1]  0  1 NA

$Time$`Time-1994 (df1994:y94012.por)`
[1]  0  1 NA

$Time$`Time-1995 (df1995.5:y95015.por)`
[1]  0 NA  1

$Time$`Time-1995 (df1995:y95002.por)`
[1]  0  1 NA

$Time$`Time-1997 (df1997:y97006.por)`
[1]  0 NA  1

$Time$`Time-2001 (df2001:y200105.dat)`
[1]  0  1 NA

$Time$`Time-2003 (df2003:time01_03.dat)`
[1]  0  1 NA



> black.proportion<-lapply(allpolls,function(poll) {
+   #poll<-abcdfs  
+   tapply(poll[[iv]],poll$id,function(x) {
+     mean(x,na.rm=T)  
+   }) 
+ }) %>% 
+   unlist %>% 
+   sort(decreasing=T)

> #we have an all-black sample in 1969, 
> #and then a bunch of oversamples of blacks
> #ANES and GSS also seem to oversample.. 
> 
> iv<-"ed_f"

> lapply(allpolls,function(poll) {
+   #poll<-abcdfs  
+   tapply(poll[[iv]],poll$id,function(x) {
+     0%in%unique(x)
+   }) %>% sum
+ }) #no zero's anywhere
$ABC
[1] 0

$ANES
[1] 0

$CBS
[1] 0

$Gallup
[1] 0

$GSS
[1] 0

$NBCLAT
[1] 0

$Roper
[1] 0

$Time
[1] 0


> iv<-"female"

> lapply(allpolls,function(poll) {
+   #poll<-abcdfs  
+   tapply(poll[[iv]],poll$id,table,useNA="ifany") #in some cases NA's
+ }) #no zero's anywhere
$ABC
$ABC$`ABC-1981 (df1981:AWP8846.DAT)`

  0   1 
646 887 

$ABC$`ABC-1982 (df1982.5:Awp7598.dat)`

  0   1 
624 884 

$ABC$`ABC-1982 (df1982:ABC7871.DAT)`

   0    1 
1413 1051 

$ABC$`ABC-1995 (df1995:aw5514.dat)`

  0   1 
504 520 

$ABC$`ABC-2003 (df2003:abcw909.por)`

  0   1 
580 553 

$ABC$`ABC-2005 (df2005.5:aw985.por)`

  0   1 
457 547 

$ABC$`ABC-2005 (df2005:aw980.por)`

  0   1 
575 507 

$ABC$`ABC-2006 (df2006:1015.por)`

  0   1 
469 531 


$ANES
$ANES$`ANES-1948`

   0    1 <NA> 
 302  357    3 

$ANES$`ANES-1952`

   0    1 <NA> 
 821  978  100 

$ANES$`ANES-1954`

  0   1 
532 607 

$ANES$`ANES-1956`

  0   1 
787 975 

$ANES$`ANES-1958`

  0   1 
667 783 

$ANES$`ANES-1960`

  0   1 
535 646 

$ANES$`ANES-1962`

  0   1 
583 714 

$ANES$`ANES-1964`

  0   1 
703 868 

$ANES$`ANES-1966`

  0   1 
572 719 

$ANES$`ANES-1968`

  0   1 
684 873 

$ANES$`ANES-1970`

  0   1 
647 860 

$ANES$`ANES-1972`

   0    1 
1168 1537 

$ANES$`ANES-1974`

  0   1 
661 914 

$ANES$`ANES-1976`

   0    1 
 945 1303 

$ANES$`ANES-1978`

   0    1 
1017 1287 

$ANES$`ANES-1980`

  0   1 
695 919 

$ANES$`ANES-1982`

  0   1 
634 784 

$ANES$`ANES-1984`

   0    1 
 989 1268 

$ANES$`ANES-1986`

   0    1 
 952 1224 

$ANES$`ANES-1988`

   0    1 
 872 1168 

$ANES$`ANES-1990`

   0    1 
 895 1085 

$ANES$`ANES-1992`

   0    1 
1158 1327 

$ANES$`ANES-1994`

  0   1 
836 959 

$ANES$`ANES-1996`

  0   1 
768 946 

$ANES$`ANES-1998`

  0   1 
575 706 

$ANES$`ANES-2000`

   0    1 
 790 1017 

$ANES$`ANES-2002`

  0   1 
664 847 

$ANES$`ANES-2004`

  0   1 
566 646 

$ANES$`ANES-2008`

   0    1 
 999 1323 

$ANES$`ANES-2012`

   0    1 
2845 3069 


$CBS
$CBS$`CBS-1994 (df1994:CJUN94A.DAT)`

  0   1 
420 558 

$CBS$`CBS-2012 (df2012:c201207a.por)`

  0   1 
481 608 


$Gallup
$Gallup$`Gallup-1955 (df1955:aipo0550.dat)`

  0   1 
682 715 

$Gallup$`Gallup-1956 (df1956:562.dat)`

   0    1 
 961 1039 

$Gallup$`Gallup-1957 (df1957:aipo0588.dat)`

   0    1 <NA> 
 740  787    1 

$Gallup$`Gallup-1960 (df1960:625.dat)`

   0    1 <NA> 
1419 1564    2 

$Gallup$`Gallup-1965 (df1965.5:709.dat)`

   0    1 
1675 1857 

$Gallup$`Gallup-1965 (df1965.7:AIPO0716.dat)`

   0    1 
1658 1867 

$Gallup$`Gallup-1965 (df1965:704.dat)`

   0    1 
1643 1849 

$Gallup$`Gallup-1966 (df1966:aipo0729.dat)`

   0    1 
1652 1867 

$Gallup$`Gallup-1967 (df1967.5:aipo0749.dat)`

   0    1 <NA> 
1641 1884    1 

$Gallup$`Gallup-1967 (df1967:aipo0746.dat)`

   0    1 
1591 1792 

$Gallup$`Gallup-1968 (df1968:aipo0757.dat)`

  0   1 
738 762 

$Gallup$`Gallup-1969 (df1969.5:773.dat)`

  0   1 
735 726 

$Gallup$`Gallup-1969 (df1969.7:aipo6955.dat)`

  0   1 
478 499 

$Gallup$`Gallup-1969 (df1969:aipo0774.dat,aipo0774S.dat)`

  0   1 
744 759 

$Gallup$`Gallup-1971 (df1971:839.dat)`

  0   1 
768 790 

$Gallup$`Gallup-1972 (df1972.5:860.dat)`

  0   1 
714 747 

$Gallup$`Gallup-1972 (df1972.7:861.dat)`

  0   1 
752 752 

$Gallup$`Gallup-1972 (df1972:846.dat)`

  0   1 
740 773 

$Gallup$`Gallup-1976 (df1976:949.dat)`

  0   1 
762 778 

$Gallup$`Gallup-1978 (df1978:995.dat)`

  0   1 
791 769 

$Gallup$`Gallup-1981 (df1981:1168.dat)`

  0   1 
811 798 

$Gallup$`Gallup-1982 (df1982.5:1189.dat)`

  0   1 
763 748 

$Gallup$`Gallup-1982 (df1982:aipo1202.dat)`

  0   1 
744 742 

$Gallup$`Gallup-1985 (df1985.5:g85064.por)`

  0   1 
508 501 

$Gallup$`Gallup-1985 (df1985:1248.dat)`

  0   1 
767 755 

$Gallup$`Gallup-1986 (df1986:1261.dat)`

  0   1 
786 783 

$Gallup$`Gallup-1988 (df1988:Ai875.dat)`

  0   1 
501 500 

$Gallup$`Gallup-1989 (df1989.5:89135.dat)`

  0   1 
611 624 

$Gallup$`Gallup-1989 (df1989:Oct89nii.dat)`

  0   1 
613 621 

$Gallup$`Gallup-1990 (df1990:g922017.DAT)`

  0   1 
508 522 

$Gallup$`Gallup-1991 (df1991.5:G122021.dat)`

  0   1 
501 502 

$Gallup$`Gallup-1991 (df1991.7:g222006.por)`

  0   1 
602 615 

$Gallup$`Gallup-1991 (df1991:g222002.dat)`

   0    1 <NA> 
 475  510    5 

$Gallup$`Gallup-1992 (df1992.5:g205115.por)`

  0   1 
305 303 

$Gallup$`Gallup-1992 (df1992.7:g322014.por)`

  0   1 
646 741 

$Gallup$`Gallup-1992 (df1992:life1992.dat)`

  0   1 
608 612 

$Gallup$`Gallup-1993 (df1993.5:A322052.DAT)`

  0   1 
491 512 

$Gallup$`Gallup-1993 (df1993:g422017B.DAT)`

  0   1 
622 622 

$Gallup$`Gallup-1994 (df1994.5:A0807010.DAT)`

  0   1 
328 328 

$Gallup$`Gallup-1994 (df1994.9:A0807018.DAT)`

  0   1 
515 496 

$Gallup$`Gallup-1994 (df1994.95:A422039.DAT)`

  0   1 
523 513 

$Gallup$`Gallup-1994 (df1994:A0807020.DAT)`

  0   1 
523 499 

$Gallup$`Gallup-1995 (df1995.5:A018A.DAT)`

  0   1 
498 510 

$Gallup$`Gallup-1995 (df1995.7:9509012.DAT)`

  0   1 
514 497 

$Gallup$`Gallup-1995 (df1995:5001020.DAT)`

  0   1 
489 511 

$Gallup$`Gallup-1996 (df1996:A9605012.DAT)`

  0   1 
497 522 

$Gallup$`Gallup-1997 (df1997.5:g9707017.DAT)`

  0   1 
498 506 

$Gallup$`Gallup-1997 (df1997:9708018.dat)`

  0   1 
398 421 

$Gallup$`Gallup-1998 (df1998.5:A9810040.DAT)`

  0   1 
482 531 

$Gallup$`Gallup-1998 (df1998:A9806017.DAT)`

  0   1 
479 524 

$Gallup$`Gallup-1999 (df1999.5:9903015.dat)`

  0   1 
491 530 

$Gallup$`Gallup-1999 (df1999.7:A9906032.dat)`

  0   1 
484 532 

$Gallup$`Gallup-1999 (df1999:9902009.dat)`

  0   1 
508 546 

$Gallup$`Gallup-2000 (df2000.5:g200008.dat)`

  0   1 
499 546 

$Gallup$`Gallup-2000 (df2000.7:a200010.dat)`

   0    1 <NA> 
 478  541    7 

$Gallup$`Gallup-2000 (df2000.9:g200025.dat)`

   0    1 <NA> 
 491  527    3 

$Gallup$`Gallup-2000 (df2000:a200026.dat)`

   0    1 <NA> 
 487  528    5 

$Gallup$`Gallup-2001 (df2001.5:a200121.dat)`

   0    1 <NA> 
 482  526    3 

$Gallup$`Gallup-2001 (df2001:g2001-06.por)`

   0    1 <NA> 
 491  520    5 

$Gallup$`Gallup-2002 (df2002:2002_06_21x.por)`

   0    1 <NA> 
 491  522    7 

$Gallup$`Gallup-2003 (df2003.5:2003_06_09x.por)`

   0    1 <NA> 
 499  527    3 

$Gallup$`Gallup-2003 (df2003:2003_05_19x.dat)`

   0    1 <NA> 
 478  532    4 

$Gallup$`Gallup-2004 (df2004:g200418.por)`

   0    1 <NA> 
 478  520    4 

$Gallup$`Gallup-2005 (df2005:g200524.por)`

   0    1 <NA> 
 482  519    3 

$Gallup$`Gallup-2006 (df2006.5:g200621.por)`

   0    1 <NA> 
 480  517    5 

$Gallup$`Gallup-2006 (df2006:g200618.dat)`

   0    1 <NA> 
 483  527    3 

$Gallup$`Gallup-2007 (df2007:a200721.por)`

   0    1 <NA> 
 480  518    9 

$Gallup$`Gallup-2008 (df2008:g200824.por)`

  0   1 
407 415 

$Gallup$`Gallup-2009 (df2009.5:usa200921.por)`

  0   1 
519 498 

$Gallup$`Gallup-2009 (df2009:g200911.por)`

  0   1 
506 505 

$Gallup$`Gallup-2010 (df2010.5:usa201021.por)`

  0   1 
528 509 

$Gallup$`Gallup-2010 (df2010:g201009.por)`

  0   1 
507 513 

$Gallup$`Gallup-2011 (df2011.5:g201121.por)`

  0   1 
506 506 

$Gallup$`Gallup-2011 (df2011:g200111.por)`

  0   1 
511 509 

$Gallup$`Gallup-2012 (df2012.5:g201208.por)`

  0   1 
517 487 

$Gallup$`Gallup-2012 (df2012.7:usa201219.por)`

  0   1 
517 498 

$Gallup$`Gallup-2013 (df2013.5:g201321.dat)`

  0   1 
468 563 

$Gallup$`Gallup-2013 (df2013.7:usa201222.dat)`

  0   1 
532 506 

$Gallup$`Gallup-2013 (df2013:g201307.por)`

  0   1 
805 724 

$Gallup$`Gallup-2014 (df2014.5:g201407.por)`

  0   1 
526 501 

$Gallup$`Gallup-2014 (df2014:g201411.por)`

   0    1 <NA> 
 630  610   12 


$GSS
$GSS$`GSS-1972`

  0   1 
807 806 

$GSS$`GSS-1973`

  0   1 
701 803 

$GSS$`GSS-1974`

  0   1 
691 793 

$GSS$`GSS-1975`

  0   1 
670 820 

$GSS$`GSS-1976`

  0   1 
669 830 

$GSS$`GSS-1977`

  0   1 
693 837 

$GSS$`GSS-1978`

  0   1 
643 889 

$GSS$`GSS-1980`

  0   1 
641 827 

$GSS$`GSS-1982`

   0    1 
 779 1081 

$GSS$`GSS-1983`

  0   1 
690 909 

$GSS$`GSS-1984`

  0   1 
598 875 

$GSS$`GSS-1985`

  0   1 
688 846 

$GSS$`GSS-1986`

  0   1 
621 849 

$GSS$`GSS-1987`

   0    1 
 778 1041 

$GSS$`GSS-1988`

  0   1 
638 843 

$GSS$`GSS-1989`

  0   1 
660 877 

$GSS$`GSS-1990`

  0   1 
604 768 

$GSS$`GSS-1991`

  0   1 
636 881 

$GSS$`GSS-1993`

  0   1 
685 921 

$GSS$`GSS-1994`

   0    1 
1290 1702 

$GSS$`GSS-1996`

   0    1 
1285 1619 

$GSS$`GSS-1998`

   0    1 
1232 1600 

$GSS$`GSS-2000`

   0    1 
1229 1588 

$GSS$`GSS-2002`

   0    1 
1228 1537 

$GSS$`GSS-2004`

   0    1 
1280 1532 

$GSS$`GSS-2006`

   0    1 
2003 2507 

$GSS$`GSS-2008`

   0    1 
 930 1093 

$GSS$`GSS-2010`

   0    1 
 891 1153 

$GSS$`GSS-2012`

   0    1 
 886 1088 

$GSS$`GSS-2014`

   0    1 
1141 1397 


$NBCLAT
$NBCLAT$`LAT-1993 (df1993:Lat322.dat)`

  0   1 
715 776 

$NBCLAT$`LAT-1994 (df1994:Lat328.dat)`

  0   1 
711 805 

$NBCLAT$`LAT-1995 (df1995.5:LAT369.DAT)`

  0   1 
693 733 

$NBCLAT$`NBC-1985 (df1985:n1985may.por)`

  0   1 
761 836 

$NBCLAT$`NBC-1995 (df1995:Nbcw4063.dat)`

  0   1 
701 764 


$Roper
$Roper$`Roper-1971 (df1971:rcom0524.dat)`

  0   1 
698 801 

$Roper$`Roper-1973 (df1973:rprr7309.dat)`

  0   1 
517 746 

$Roper$`Roper-1974 (df1974.5:rprr7401.dat)`

  0   1 
773 997 

$Roper$`Roper-1974 (df1974:rprr7409.dat)`

   0    1 
 953 1045 

$Roper$`Roper-1975 (df1975.5:rprr7501.dat)`

   0    1 
 957 1048 

$Roper$`Roper-1975 (df1975:rprr7509.dat)`

   0    1 
 956 1051 

$Roper$`Roper-1976 (df1976.5:rr7601.por)`

   0    1 
 949 1052 

$Roper$`Roper-1976 (df1976:rprr7609.dat)`

   0    1 
 942 1060 

$Roper$`Roper-1977 (df1977:ROPER771.DAT)`

   0    1 
 962 1038 

$Roper$`Roper-1978 (df1978.5:RR7801.DAT)`

   0    1 
 940 1061 

$Roper$`Roper-1978 (df1978:ROPER7802.dat)`

   0    1 
 941 1061 

$Roper$`Roper-1979 (df1979.5:Roper791.dat)`

   0    1 
 909 1034 

$Roper$`Roper-1979 (df1979:rprr7909.dat)`

   0    1 
 912 1034 

$Roper$`Roper-1980 (df1980:rprr8001.dat)`

   0    1 
 906 1018 

$Roper$`Roper-1981 (df1981.5:rprr8101.dat)`

   0    1 <NA> 
 905 1026    1 

$Roper$`Roper-1981 (df1981:rprr8102.dat)`

   0    1 
 929 1045 

$Roper$`Roper-1982 (df1982.5:rr8201.dat)`

   0    1 <NA> 
 935 1051    2 

$Roper$`Roper-1982 (df1982:rprr8209.dat)`

   0    1 
 942 1058 

$Roper$`Roper-1983 (df1983.5:rprr8301.dat)`

   0    1 
 942 1058 

$Roper$`Roper-1983 (df1983:rprr8309.dat)`

   0    1 
 947 1053 

$Roper$`Roper-1984 (df1984.5:rprr8401.por)`

   0    1 
 940 1060 

$Roper$`Roper-1984 (df1984:rprr8402.dat)`

   0    1 
 939 1061 

$Roper$`Roper-1985 (df1985:rr8501.por)`

   0    1 
 928 1048 

$Roper$`Roper-1986 (df1986:rr8601.dat)`

   0    1 
 941 1039 

$Roper$`Roper-1987 (df1987:rr8701.DAT)`

   0    1 
 952 1045 


$Time
$Time$`Time-1989 (df1989.5:y19895406.dat)`

  0   1 
506 506 

$Time$`Time-1989 (df1989:y19985415.por)`

   0    1 <NA> 
 215  209   80 

$Time$`Time-1993 (df1993:y85212.dat)`

  0   1 
250 250 

$Time$`Time-1994 (df1994:y94012.por)`

  0   1 
404 404 

$Time$`Time-1995 (df1995.5:y95015.por)`

  0   1 
400 400 

$Time$`Time-1995 (df1995:y95002.por)`

  0   1 
500 500 

$Time$`Time-1997 (df1997:y97006.por)`

  0   1 
511 513 

$Time$`Time-2001 (df2001:y200105.dat)`

  0   1 
507 508 

$Time$`Time-2003 (df2003:time01_03.dat)`

  0   1 
503 608 



> lapply(allpolls,function(poll) {
+   #poll<-abcdfs  
+   missing.polls<-tapply(poll[[iv]],poll$id,
+                         function(x) sum(!is.na(x))==0) #in some cases NA's
+   which(missing.polls)
+ }) #no zero's anywhere
$ABC
named integer(0)

$ANES
named integer(0)

$CBS
named integer(0)

$Gallup
named integer(0)

$GSS
named integer(0)

$NBCLAT
named integer(0)

$Roper
named integer(0)

$Time
named integer(0)


> ############################################################# 
> ############################################################# 
> 
> #save all the loaded polls to RData file, 
> #which next file will load
> setwd(filesdir)

> save(
+   allpolls,
+   file="01po_grouped.RDS"
+ )
[1] "######"
[1] "Running:"
[1] "02_prep.R"

> #########################################################
> #########################################################
> 
> #clear workspace
> rm(list=ls())

> #load packages
> require(stringr)

> require(plyr)

> require(dplyr)

> require(zoo)

> require(data.table)

> require(tidyr)

> require(rprojroot)

> #set dirs
> rootdir<-find_root(
+   criterion=has_file('_rapol.Rproj')
+ )

> codedir<-file.path(rootdir,"code")

> setwd(codedir); dir()
[1] "01_publicopinion" "02_voting"        "03_dind"          "dirs.R"          
[5] "runeverything.R" 

> source('dirs.R')

> #helper functions
> setwd(pcodedir); dir()
 [1] "01_group.R"                "02_prep.R"                 "03_summarize.R"           
 [4] "04_summarize_output.R"     "05_regmods.R"              "06_regmods_output.R"      
 [7] "07_predict.R"              "08_predict_output.R"       "09_extra_output.R"        
[10] "10_RandR_summarize.R"      "11_RandR_model.R"          "12_RandR_modeloutput.R"   
[13] "functions.R"               "getcode.R"                 "getinfo2.R"               
[16] "readpoll_functions.R"      "XX_predict.R"              "XX_predict_output.R"      
[19] "XX_predict_outputTrends.R" "XX_predictscribbles.R"     "XX_regmods.R"             
[22] "XX_regmods_output.R"       "XX_runall.R"               "XX_scanmeta.R"            
[25] "XXdirs.R"                 

> source('readpoll_functions.R')

> source('getinfo2.R')

> #helper function for reading
> readme.csv<-function(x) {
+   df<-read.csv(x,stringsAsFactors=F)
+   df$X<-NULL
+   return(df)
+ }

> ############################################################# 
> ############################################################# 
> 
> #LOAD
> 
> #load all the polls
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> load("01po_grouped.RDS")

> #put in a single df and order
> fulldf<-rbind.fill(allpolls)

> fulldf<-fulldf[order(fulldf$id),]

> ##########################################################
> ##########################################################
> 
> #PREP IV'S
> 
> ###respondent id
> #each row is a respondent,
> fulldf$respid<-1:nrow(fulldf)

> ###gender 
> ###(1=male, 2=female)
> femaleinfo<-!is.na(fulldf$female)

> fulldf$gender<-NA

> fulldf$gender[fulldf$female==0 & femaleinfo]<-1

> fulldf$gender[fulldf$female==1 & femaleinfo]<-2

> tableNA(fulldf$gender)
x
     1      2   <NA> 
140561 159836    268 

> ###race 
> ###(1=white,2=black,3=other)
> raceinfo<-!is.na(fulldf$white) & 
+   !is.na(fulldf$black) 

> names(fulldf)[1:20]
 [1] "id"              "statename"       "state_fips"      "state_alpha2"    "region"         
 [6] "region.num"      "division"        "division.num"    "weights"         "year"           
[11] "birthyear"       "black"           "white"           "age"             "female"         
[16] "ed_f"            "deathpen.abc_p"  "deathpen.abc_np" "deathpen.abc_n"  "preferdp.abc_p" 

> #if no race info, make NA
> fulldf$race<-NA

> fulldf$race[raceinfo]<-3 #wherever raceinfo, make other as default

> fulldf$race[fulldf$white & raceinfo]<-1 #white

> fulldf$race[fulldf$black & raceinfo]<-2 #black

> tableNA(fulldf$race)
x
     1      2      3   <NA> 
250989  34210  13567   1899 

> #add race2 (pace Sheridan and Gelman (2015))
> fulldf$race2<-fulldf$race

> fulldf$race2[fulldf$race==3]<-1 #non-black

> ###education 
> ###(1=HS drop, 2=HS grad, 3=some college, 4=college grad)
> fulldf$ed<-fulldf$ed_f #no changes necessary

> #age (1=<=29, 2=30-44, 3=45-65, 4=65+)
> fulldf$age.old<-fulldf$age

> ageinfo<-!is.na(fulldf$age)

> fulldf$age<-NA

> fulldf$age[fulldf$age.old<=29 & ageinfo]<-1 

> fulldf$age[fulldf$age.old>=30 & fulldf$age.old<=44 & ageinfo]<-2 

> fulldf$age[fulldf$age.old>=45 & fulldf$age.old<=64 & ageinfo]<-3

> fulldf$age[fulldf$age.old>=65 & ageinfo]<-4

> tableNA(fulldf$age)
x
    1     2     3     4  <NA> 
62326 87616 90662 52376  7685 

> ###cohort 
> quantile(
+   fulldf$birthyear,
+   c(0,0.25,0.5,0.75,1),
+   na.rm=T
+ )
  0%  25%  50%  75% 100% 
1853 1925 1943 1957 1996 

> #roughly by quantile/meaningful dates
> #(1=<=1925, 2=1926-1945, 3=1945-1964, 4=1965+)
> cohortinfo<-!is.na(fulldf$birthyear)

> fulldf$cohort<-NA

> fulldf$cohort[fulldf$birthyear<=1925 & 
+                 cohortinfo]<-1 

> fulldf$cohort[fulldf$birthyear>=1926 & 
+                 fulldf$birthyear<=1944 & 
+                 cohortinfo]<-2 

> fulldf$cohort[fulldf$birthyear>=1945 & 
+                 fulldf$birthyear<=1964 & 
+                 cohortinfo]<-3

> fulldf$cohort[fulldf$birthyear>=1965 & 
+                 cohortinfo]<-4

> tableNA(fulldf$cohort) #not exactly as well balanced, but this is cost of getting mass inc gen.
x
     1      2      3      4   <NA> 
 77322  77178 101181  38724   6260 

> ###poll-level
> ###year
> tableNA(fulldf$year)
x
 1948  1952  1954  1955  1956  1957  1958  1960  1962  1964  1965  1966  1967  1968  1969 
  662  1899  1139  1397  3762  1528  1450  4166  1297  1571 10549  4810  6909  3057  3941 
 1970  1971  1972  1973  1974  1975  1976  1977  1978  1979  1980  1981  1982  1983  1984 
 1507  3057  8796  2767  6827  5502  9290  3530  9399  3889  5006  7048 14235  5599  7730 
 1985  1986  1987  1988  1989  1990  1991  1992  1993  1994  1995  1996  1997  1998  1999 
 7638  7195  3816  4522  5522  4382  4727  5700  5844 11814  8734  5637  2847  6129  3091 
 2000  2001  2002  2003  2004  2005  2006  2007  2008  2009  2010  2011  2012  2013  2014 
 8736  3042  5296  4287  5026  3090  7525  1007  5167  2028  4101  2032 10996  3598  4817 
 <NA> 
    0 

> ###poll
> tableNA(fulldf$id)
x
                  ABC-1981 (df1981:AWP8846.DAT) 
                                           1533 
                ABC-1982 (df1982.5:Awp7598.dat) 
                                           1508 
                  ABC-1982 (df1982:ABC7871.DAT) 
                                           2464 
                   ABC-1995 (df1995:aw5514.dat) 
                                           1024 
                  ABC-2003 (df2003:abcw909.por) 
                                           1133 
                  ABC-2005 (df2005.5:aw985.por) 
                                           1004 
                    ABC-2005 (df2005:aw980.por) 
                                           1082 
                     ABC-2006 (df2006:1015.por) 
                                           1000 
                                      ANES-1948 
                                            662 
                                      ANES-1952 
                                           1899 
                                      ANES-1954 
                                           1139 
                                      ANES-1956 
                                           1762 
                                      ANES-1958 
                                           1450 
                                      ANES-1960 
                                           1181 
                                      ANES-1962 
                                           1297 
                                      ANES-1964 
                                           1571 
                                      ANES-1966 
                                           1291 
                                      ANES-1968 
                                           1557 
                                      ANES-1970 
                                           1507 
                                      ANES-1972 
                                           2705 
                                      ANES-1974 
                                           1575 
                                      ANES-1976 
                                           2248 
                                      ANES-1978 
                                           2304 
                                      ANES-1980 
                                           1614 
                                      ANES-1982 
                                           1418 
                                      ANES-1984 
                                           2257 
                                      ANES-1986 
                                           2176 
                                      ANES-1988 
                                           2040 
                                      ANES-1990 
                                           1980 
                                      ANES-1992 
                                           2485 
                                      ANES-1994 
                                           1795 
                                      ANES-1996 
                                           1714 
                                      ANES-1998 
                                           1281 
                                      ANES-2000 
                                           1807 
                                      ANES-2002 
                                           1511 
                                      ANES-2004 
                                           1212 
                                      ANES-2008 
                                           2322 
                                      ANES-2012 
                                           5914 
                  CBS-1994 (df1994:CJUN94A.DAT) 
                                            978 
                 CBS-2012 (df2012:c201207a.por) 
                                           1089 
              Gallup-1955 (df1955:aipo0550.dat) 
                                           1397 
                   Gallup-1956 (df1956:562.dat) 
                                           2000 
              Gallup-1957 (df1957:aipo0588.dat) 
                                           1528 
                   Gallup-1960 (df1960:625.dat) 
                                           2985 
                 Gallup-1965 (df1965.5:709.dat) 
                                           3532 
            Gallup-1965 (df1965.7:AIPO0716.dat) 
                                           3525 
                   Gallup-1965 (df1965:704.dat) 
                                           3492 
              Gallup-1966 (df1966:aipo0729.dat) 
                                           3519 
            Gallup-1967 (df1967.5:aipo0749.dat) 
                                           3526 
              Gallup-1967 (df1967:aipo0746.dat) 
                                           3383 
              Gallup-1968 (df1968:aipo0757.dat) 
                                           1500 
                 Gallup-1969 (df1969.5:773.dat) 
                                           1461 
            Gallup-1969 (df1969.7:aipo6955.dat) 
                                            977 
Gallup-1969 (df1969:aipo0774.dat,aipo0774S.dat) 
                                           1503 
                   Gallup-1971 (df1971:839.dat) 
                                           1558 
                 Gallup-1972 (df1972.5:860.dat) 
                                           1461 
                 Gallup-1972 (df1972.7:861.dat) 
                                           1504 
                   Gallup-1972 (df1972:846.dat) 
                                           1513 
                   Gallup-1976 (df1976:949.dat) 
                                           1540 
                   Gallup-1978 (df1978:995.dat) 
                                           1560 
                  Gallup-1981 (df1981:1168.dat) 
                                           1609 
                Gallup-1982 (df1982.5:1189.dat) 
                                           1511 
              Gallup-1982 (df1982:aipo1202.dat) 
                                           1486 
              Gallup-1985 (df1985.5:g85064.por) 
                                           1009 
                  Gallup-1985 (df1985:1248.dat) 
                                           1522 
                  Gallup-1986 (df1986:1261.dat) 
                                           1569 
                 Gallup-1988 (df1988:Ai875.dat) 
                                           1001 
               Gallup-1989 (df1989.5:89135.dat) 
                                           1235 
              Gallup-1989 (df1989:Oct89nii.dat) 
                                           1234 
               Gallup-1990 (df1990:g922017.DAT) 
                                           1030 
             Gallup-1991 (df1991.5:G122021.dat) 
                                           1003 
             Gallup-1991 (df1991.7:g222006.por) 
                                           1217 
               Gallup-1991 (df1991:g222002.dat) 
                                            990 
             Gallup-1992 (df1992.5:g205115.por) 
                                            608 
             Gallup-1992 (df1992.7:g322014.por) 
                                           1387 
              Gallup-1992 (df1992:life1992.dat) 
                                           1220 
             Gallup-1993 (df1993.5:A322052.DAT) 
                                           1003 
              Gallup-1993 (df1993:g422017B.DAT) 
                                           1244 
            Gallup-1994 (df1994.5:A0807010.DAT) 
                                            656 
            Gallup-1994 (df1994.9:A0807018.DAT) 
                                           1011 
            Gallup-1994 (df1994.95:A422039.DAT) 
                                           1036 
              Gallup-1994 (df1994:A0807020.DAT) 
                                           1022 
               Gallup-1995 (df1995.5:A018A.DAT) 
                                           1008 
             Gallup-1995 (df1995.7:9509012.DAT) 
                                           1011 
               Gallup-1995 (df1995:5001020.DAT) 
                                           1000 
              Gallup-1996 (df1996:A9605012.DAT) 
                                           1019 
            Gallup-1997 (df1997.5:g9707017.DAT) 
                                           1004 
               Gallup-1997 (df1997:9708018.dat) 
                                            819 
            Gallup-1998 (df1998.5:A9810040.DAT) 
                                           1013 
              Gallup-1998 (df1998:A9806017.DAT) 
                                           1003 
             Gallup-1999 (df1999.5:9903015.dat) 
                                           1021 
            Gallup-1999 (df1999.7:A9906032.dat) 
                                           1016 
               Gallup-1999 (df1999:9902009.dat) 
                                           1054 
             Gallup-2000 (df2000.5:g200008.dat) 
                                           1045 
             Gallup-2000 (df2000.7:a200010.dat) 
                                           1026 
             Gallup-2000 (df2000.9:g200025.dat) 
                                           1021 
               Gallup-2000 (df2000:a200026.dat) 
                                           1020 
             Gallup-2001 (df2001.5:a200121.dat) 
                                           1011 
              Gallup-2001 (df2001:g2001-06.por) 
                                           1016 
           Gallup-2002 (df2002:2002_06_21x.por) 
                                           1020 
         Gallup-2003 (df2003.5:2003_06_09x.por) 
                                           1029 
           Gallup-2003 (df2003:2003_05_19x.dat) 
                                           1014 
               Gallup-2004 (df2004:g200418.por) 
                                           1002 
               Gallup-2005 (df2005:g200524.por) 
                                           1004 
             Gallup-2006 (df2006.5:g200621.por) 
                                           1002 
               Gallup-2006 (df2006:g200618.dat) 
                                           1013 
               Gallup-2007 (df2007:a200721.por) 
                                           1007 
               Gallup-2008 (df2008:g200824.por) 
                                            822 
           Gallup-2009 (df2009.5:usa200921.por) 
                                           1017 
               Gallup-2009 (df2009:g200911.por) 
                                           1011 
           Gallup-2010 (df2010.5:usa201021.por) 
                                           1037 
               Gallup-2010 (df2010:g201009.por) 
                                           1020 
             Gallup-2011 (df2011.5:g201121.por) 
                                           1012 
               Gallup-2011 (df2011:g200111.por) 
                                           1020 
             Gallup-2012 (df2012.5:g201208.por) 
                                           1004 
           Gallup-2012 (df2012.7:usa201219.por) 
                                           1015 
             Gallup-2013 (df2013.5:g201321.dat) 
                                           1031 
           Gallup-2013 (df2013.7:usa201222.dat) 
                                           1038 
               Gallup-2013 (df2013:g201307.por) 
                                           1529 
             Gallup-2014 (df2014.5:g201407.por) 
                                           1027 
               Gallup-2014 (df2014:g201411.por) 
                                           1252 
                                       GSS-1972 
                                           1613 
                                       GSS-1973 
                                           1504 
                                       GSS-1974 
                                           1484 
                                       GSS-1975 
                                           1490 
                                       GSS-1976 
                                           1499 
                                       GSS-1977 
                                           1530 
                                       GSS-1978 
                                           1532 
                                       GSS-1980 
                                           1468 
                                       GSS-1982 
                                           1860 
                                       GSS-1983 
                                           1599 
                                       GSS-1984 
                                           1473 
                                       GSS-1985 
                                           1534 
                                       GSS-1986 
                                           1470 
                                       GSS-1987 
                                           1819 
                                       GSS-1988 
                                           1481 
                                       GSS-1989 
                                           1537 
                                       GSS-1990 
                                           1372 
                                       GSS-1991 
                                           1517 
                                       GSS-1993 
                                           1606 
                                       GSS-1994 
                                           2992 
                                       GSS-1996 
                                           2904 
                                       GSS-1998 
                                           2832 
                                       GSS-2000 
                                           2817 
                                       GSS-2002 
                                           2765 
                                       GSS-2004 
                                           2812 
                                       GSS-2006 
                                           4510 
                                       GSS-2008 
                                           2023 
                                       GSS-2010 
                                           2044 
                                       GSS-2012 
                                           1974 
                                       GSS-2014 
                                           2538 
                   LAT-1993 (df1993:Lat322.dat) 
                                           1491 
                   LAT-1994 (df1994:Lat328.dat) 
                                           1516 
                 LAT-1995 (df1995.5:LAT369.DAT) 
                                           1426 
                 NBC-1985 (df1985:n1985may.por) 
                                           1597 
                 NBC-1995 (df1995:Nbcw4063.dat) 
                                           1465 
               Roper-1971 (df1971:rcom0524.dat) 
                                           1499 
               Roper-1973 (df1973:rprr7309.dat) 
                                           1263 
             Roper-1974 (df1974.5:rprr7401.dat) 
                                           1770 
               Roper-1974 (df1974:rprr7409.dat) 
                                           1998 
             Roper-1975 (df1975.5:rprr7501.dat) 
                                           2005 
               Roper-1975 (df1975:rprr7509.dat) 
                                           2007 
               Roper-1976 (df1976.5:rr7601.por) 
                                           2001 
               Roper-1976 (df1976:rprr7609.dat) 
                                           2002 
               Roper-1977 (df1977:ROPER771.DAT) 
                                           2000 
               Roper-1978 (df1978.5:RR7801.DAT) 
                                           2001 
              Roper-1978 (df1978:ROPER7802.dat) 
                                           2002 
             Roper-1979 (df1979.5:Roper791.dat) 
                                           1943 
               Roper-1979 (df1979:rprr7909.dat) 
                                           1946 
               Roper-1980 (df1980:rprr8001.dat) 
                                           1924 
             Roper-1981 (df1981.5:rprr8101.dat) 
                                           1932 
               Roper-1981 (df1981:rprr8102.dat) 
                                           1974 
               Roper-1982 (df1982.5:rr8201.dat) 
                                           1988 
               Roper-1982 (df1982:rprr8209.dat) 
                                           2000 
             Roper-1983 (df1983.5:rprr8301.dat) 
                                           2000 
               Roper-1983 (df1983:rprr8309.dat) 
                                           2000 
             Roper-1984 (df1984.5:rprr8401.por) 
                                           2000 
               Roper-1984 (df1984:rprr8402.dat) 
                                           2000 
                 Roper-1985 (df1985:rr8501.por) 
                                           1976 
                 Roper-1986 (df1986:rr8601.dat) 
                                           1980 
                 Roper-1987 (df1987:rr8701.DAT) 
                                           1997 
             Time-1989 (df1989.5:y19895406.dat) 
                                           1012 
               Time-1989 (df1989:y19985415.por) 
                                            504 
                  Time-1993 (df1993:y85212.dat) 
                                            500 
                  Time-1994 (df1994:y94012.por) 
                                            808 
                Time-1995 (df1995.5:y95015.por) 
                                            800 
                  Time-1995 (df1995:y95002.por) 
                                           1000 
                  Time-1997 (df1997:y97006.por) 
                                           1024 
                 Time-2001 (df2001:y200105.dat) 
                                           1015 
               Time-2003 (df2003:time01_03.dat) 
                                           1111 
                                           <NA> 
                                              0 

> fulldf$pollid<-fulldf$id

> ###state-level
> ###numeric code in order of respective statea2
> tableNA(fulldf$state_alpha2) #only 35 people in Alaska!
x
   AK    AL    AR    AZ    CA    CO    CT    DC    DE    FL    GA    HI    IA    ID    IL 
   35  3813  3760  2577 21206  3613  2747   825   988  9090  6134   109  4406   731  9524 
   IN    KS    KY    LA    MA    MD    ME    MI    MN    MO    MS    MT    NC    ND    NE 
 5746  2027  3057  3251  6009  3863  1065  8202  4197  4677  2190   906  6165   421  2065 
   NH    NJ    NM    NV    NY    OH    OK    OR    PA    RI    SC    SD    TN    TX    UT 
  754  5118   893   662 17325 10303  2028  3595 12109   849  2863   826  4761 12565  1903 
   VA    VT    WA    WI    WV    WY  <NA> 
 6163   388  4244  5144  1373   644 82756 

> a2levels<-
+   names(table(fulldf$state_alpha2)) %>% 
+   sort

> fulldf$state_alpha2<-factor(
+   fulldf$state_alpha2,
+   levels=a2levels
+ )

> tableNA(fulldf$state_alpha2)
x
   AK    AL    AR    AZ    CA    CO    CT    DC    DE    FL    GA    HI    IA    ID    IL 
   35  3813  3760  2577 21206  3613  2747   825   988  9090  6134   109  4406   731  9524 
   IN    KS    KY    LA    MA    MD    ME    MI    MN    MO    MS    MT    NC    ND    NE 
 5746  2027  3057  3251  6009  3863  1065  8202  4197  4677  2190   906  6165   421  2065 
   NH    NJ    NM    NV    NY    OH    OK    OR    PA    RI    SC    SD    TN    TX    UT 
  754  5118   893   662 17325 10303  2028  3595 12109   849  2863   826  4761 12565  1903 
   VA    VT    WA    WI    WV    WY  <NA> 
 6163   388  4244  5144  1373   644 82756 

> # geta2.mrp<-function(a2) {
> #   which(a2levels==a2)
> # }
> # geta2.mrp("MI") #quick function to retrieve numeric coding
> 
> ###region 
> ###(1=NE, 2=MW, 3=South, 4=West)
> tableNA(fulldf$region.num)
x
    1     2     3     4  <NA> 
63325 77125 99174 55755  5286 

> tableNA(fulldf$region)
x
  Midwest Northeast     South      West      <NA> 
    77125     63325     99174     55755      5286 

> fulldf$region<-fulldf$region.num

> #where state_alpha2, present
> fulldf$regionDC<-fulldf$region

> fulldf$regionDC[fulldf$state_alpha2=="DC"]<-5 #extra region

> fulldf$regionDC[is.na(fulldf$state_alpha2)]<-NA

> ###division (1-9)
> tableNA(fulldf$division)
x
East North Central East South Central    Middle Atlantic           Mountain 
             51842              18541              46163              16296 
       New England            Pacific     South Atlantic West North Central 
             15983              38700              50757              24014 
West South Central               <NA> 
             28519               9850 

> fulldf$division<-fulldf$division.num

> #this only useful where state_alpha2
> fulldf$divisionDC<-fulldf$division

> fulldf$divisionDC[fulldf$state_alpha2=="DC"]<-10 #extra division

> fulldf$divisionDC[is.na(fulldf$state_alpha2)]<-NA

> vars<-c(
+   "age",
+   "gender",
+   "year",
+   "region",
+   "division",
+   "state_alpha2"
+ )

> lapply(vars,function(x) class(fulldf[[x]]))
[[1]]
[1] "numeric"

[[2]]
[1] "numeric"

[[3]]
[1] "integer"

[[4]]
[1] "integer"

[[5]]
[1] "integer"

[[6]]
[1] "factor"


> ###dem-based interactions
> tmpdf<-expand.grid(
+   v1=c(
+     "race",
+     "race2"
+   ),
+   v2=c(
+     "age",
+     "gender",
+     "year",
+     "region",
+     "division",
+     "state_alpha2",
+     "ed"
+   ),
+   stringsAsFactors=F
+ )

> tmpseq.i<-1:nrow(tmpdf)

> for(i in tmpseq.i) {
+   print(i)
+   #i<-1
+   thisrow<-tmpdf[i,]
+   newname<-paste0(
+     thisrow$v1,
+     "X",
+     thisrow$v2
+   )
+   #make these all distinct categories
+   fulldf[[newname]]<-paste0(
+     fulldf[[thisrow$v1]],
+     "_",
+     fulldf[[thisrow$v2]]
+   )
+   #should be NA if any of the constituent variables are NA
+   tmp<-is.na(fulldf[[thisrow$v1]]) | is.na(fulldf[[thisrow$v2]])
+   fulldf[[newname]][tmp]<-NA
+ }
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11
[1] 12
[1] 13
[1] 14

> ##########################################################
> ##########################################################
> 
> #PREP DV'S/RESHAPE DATA
> 
> #flip the dataset
> #goal is respondent ID-question dataset
> #which I will classify by dimension
> 
> #we will need data.table
> #since this is going to be a very large dataset
> fulldf<-as.data.table(fulldf)

> #ignore all respondent id attributes for now
> #melt using the question vars and resp-id
> idvars<-c("respid") #id

> tmpregex<-"\\_(n|p|np|a|na|m|nm)(t)?$"

> qvars<-names(fulldf)[str_detect(names(fulldf),tmpregex)]

> variable.name<-"qgroup"

> value.name<-"response"

> tmpdf<-melt(
+   fulldf,
+   id.vars=idvars,
+   measure.vars=qvars
+ )

> #trim this dataframe
> #only need responses
> #anything that is NA
> tmpdf<-tmpdf[!is.na(value)]

> #also don't need 't' vars,
> #these can be generated from other info
> tmpdf<-tmpdf[!str_detect(variable,"\\_(pt|mt|at)$")]

> #get question/type
> tmpdf$question<-str_replace(
+   tmpdf$variable,
+   "^(.*)\\_.*$",
+   "\\1"
+ )

> tmpdf$type<-str_replace(
+   tmpdf$variable,
+   "^.*\\_(.*)$",
+   "\\1"
+ )

> tmpdf$variable<-NULL #not needed

> #get dimension
> tmpdf$dimension<-NA

> tmprows<-tmpdf$type%in%c("p","np")

> tmpdf$dimension[tmprows]<-"punitive"

> tmprows<-tmpdf$type%in%c("a","na")

> tmpdf$dimension[tmprows]<-"anxiety"

> tmprows<-tmpdf$type%in%c("m","nm")

> tmpdf$dimension[tmprows]<-"mistrust"

> #order
> tmplevels<-c("p","np","a","na","m","nm","n")

> tmpdf$type<-factor(
+   tmpdf$type,
+   levels=tmplevels
+ )

> tmpdf<-tmpdf[order(question,type),]

> #hacky way to get dimensions
> require(zoo) 

> sum(is.na(tmpdf$dimension))
[1] 485096

> tmpdf$dimension<-na.locf(tmpdf$dimension)

> sum(is.na(tmpdf$dimension))
[1] 0

> #all assigned correctly
> tmp<-tmpdf[,unique(dimension),.(question)]

> table(tmp$question)

       adeqprotect.time            canrehab.lat  cjustconfidence.gallup 
                      1                       1                       1 
    cjustconfidence.gss        confidence.roper   courtsconvconf.gallup 
                      1                       1                       1 
     courtsharsh.gallup         courtsharsh.gss        crimebill.gallup 
                      1                       1                       1 
       crimespend.roper            deathpen.abc         deathpen.gallup 
                      1                       1                       1 
           deathpen.gss          deathpen.roper           deathpen.time 
                      1                       1                       1 
       deterrent.gallup           harsher.roper    moreimppunish.gallup 
                      1                       1                       1 
     moreprisons.gallup         moreprisons.lat            natcrime.gss 
                      1                       1                       1 
           natcrimy.gss            natcrimz.gss             police.anes 
                      1                       1                       1 
 policebrutality.gallup policeconfidence.gallup    policehonesty.gallup 
                      1                       1                       1 
      policehonesty.nbc    policeprotect.gallup    policerespect.gallup 
                      1                       1                       1 
           preferdp.abc         preferdp.gallup           raccused.anes 
                      1                       1                       1 
       rehabilitate.lat          spendmore.anes            sppolice.gss 
                      1                       1                       1 
          useforce.anes         worryvictim.cbs        worryvictim.time 
                      1                       1                       1 

> #now, remove dim info in response
> #we have dim info in a grouping var
> head(tmpdf)
   respid value         question   type dimension
    <int> <int>           <char> <fctr>    <char>
1: 292892     0 adeqprotect.time      a   anxiety
2: 292893     0 adeqprotect.time      a   anxiety
3: 292894     1 adeqprotect.time      a   anxiety
4: 292895     0 adeqprotect.time      a   anxiety
5: 292896     0 adeqprotect.time      a   anxiety
6: 292897     1 adeqprotect.time      a   anxiety

> tmpdf$type %>% unique
[1] a  na n  p  np m  nm
Levels: p np a na m nm n

> tmpdf$type2<-NA

> tmprows<-tmpdf$type%in%c("p","m","a")

> tmpdf$type2[tmprows]<-"aff"

> tmprows<-tmpdf$type%in%c("np","nm","na")

> tmpdf$type2[tmprows]<-"neg"

> tmprows<-tmpdf$type%in%c("n")

> tmpdf$type2[tmprows]<-"neut"

> tmpdf$type<-NULL

> head(tmpdf)
   respid value         question dimension  type2
    <int> <int>           <char>    <char> <char>
1: 292892     0 adeqprotect.time   anxiety    aff
2: 292893     0 adeqprotect.time   anxiety    aff
3: 292894     1 adeqprotect.time   anxiety    aff
4: 292895     0 adeqprotect.time   anxiety    aff
5: 292896     0 adeqprotect.time   anxiety    aff
6: 292897     1 adeqprotect.time   anxiety    aff

> #now, spread to get 
> #respondent.id - question df
> finaldf<-spread(
+   tmpdf,
+   type2,
+   value
+ )

> ##########################################################
> ##########################################################
> 
> #MERGE
> #get respondent info from fulldf
> rootvars<-varsdf$oldname

> intvars<-names(fulldf)[str_detect(names(fulldf),"X")]

> keepvars<-c(rootvars,intvars) %>%
+   unique

> tmp<-keepvars%in%names(fulldf)

> if(sum(!tmp)>0)
+   stop()

> finaldf<-merge(
+   finaldf,
+   fulldf[,keepvars,with=F],
+   by="respid",
+   all.X=T
+ )

> ##########################################################
> ##########################################################
> 
> #ADD QUESTION INFO
> tmpvars<-c("question","alt","deathpen")

> tmpdf<-questionsdf[,tmpvars]

> tmpdf$alt<-as.numeric(tmpdf$alt)

> tmpdf$deathpen<-as.numeric(tmpdf$deathpen)

> finaldf<-merge(
+   finaldf,
+   tmpdf,
+   by="question",
+   all.x=T
+ )

> finaldf
           question respid dimension aff neg neut gender race race2 ed age cohort
1  adeqprotect.time 292892   anxiety   0   1    0      1    1     1  1   1      3
2  adeqprotect.time 298011   anxiety   1   0    0      1    1     1  4   4      2
3  adeqprotect.time 292893   anxiety   0   1    0      1    1     1  1   3      2
4  adeqprotect.time 298012   anxiety   1   0    0      2    2     2  4   1      4
5  adeqprotect.time 292894   anxiety   1   0    0      1    1     1  3   2      3
6  adeqprotect.time 294687   anxiety   1   0    0      2    1     1  2   2      3
7  adeqprotect.time 292895   anxiety   0   1    0      1    1     1  2   2      3
8  adeqprotect.time 298013   anxiety   0   1    0      2    2     2 NA  NA     NA
9  adeqprotect.time 292896   anxiety   0   1    0      1    1     1  3   1      4
10 adeqprotect.time 298014   anxiety   1   0    0      2    2     2  3   2      4
11 adeqprotect.time 292897   anxiety   1   0    0      1    1     1  3   2      3
12 adeqprotect.time 294689   anxiety   0   1    0      1    1     1  4   4      1
13 adeqprotect.time 292898   anxiety   1   0    0      1    2     2  1   1      3
14 adeqprotect.time 298015   anxiety   1   0    0      2    1     1  3  NA      3
15 adeqprotect.time 292899   anxiety   0   1    0      1    1     1  1   3      2
16 adeqprotect.time 298016   anxiety   0   1    0      2    1     1  4   4      2
17 adeqprotect.time 292900   anxiety   1   0    0      1    1     1  2   1      4
18 adeqprotect.time 294691   anxiety   0   1    0      1    1     1  4   2      3
19 adeqprotect.time 292901   anxiety   1   0    0      1    1     1  2  NA     NA
20 adeqprotect.time 298017   anxiety   0   1    0      1    1     1  3  NA      3
21 adeqprotect.time 292902   anxiety   1   0    0      1    1     1  2   1      4
22 adeqprotect.time 298018   anxiety   1   0    0      2    2     2  2   1      4
23 adeqprotect.time 292903   anxiety   0   1    0      1    1     1  4  NA     NA
24 adeqprotect.time 294693   anxiety   0   1    0      2    1     1  2   3      2
25 adeqprotect.time 292904   anxiety   0   1    0      1    1     1  1   4      1
26 adeqprotect.time 298019   anxiety   1   0    0      2    1     1  3   4      2
27 adeqprotect.time 292905   anxiety   1   0    0      1    1     1  3   1      3
28 adeqprotect.time 298020   anxiety   1   0    0      1    1     1  1   1      4
                               pollid state_alpha2 regionDC region division year weights
1  Time-1989 (df1989.5:y19895406.dat)           MA        1      1        1 1989   1.758
2       Time-1997 (df1997:y97006.por)         <NA>       NA      3        5 1997   0.758
3  Time-1989 (df1989.5:y19895406.dat)           MD        3      3        5 1989   1.487
4       Time-1997 (df1997:y97006.por)         <NA>       NA      3        6 1997   1.058
5  Time-1989 (df1989.5:y19895406.dat)           NC        3      3        5 1989   0.915
6       Time-1993 (df1993:y85212.dat)           IA        2      2        4 1993   0.773
7  Time-1989 (df1989.5:y19895406.dat)           MD        3      3        5 1989   0.768
8       Time-1997 (df1997:y97006.por)         <NA>       NA      3        5 1997   1.772
9  Time-1989 (df1989.5:y19895406.dat)           PA        1      1        2 1989   1.071
10      Time-1997 (df1997:y97006.por)         <NA>       NA      3        5 1997   1.158
11 Time-1989 (df1989.5:y19895406.dat)           GA        3      3        5 1989   0.915
12      Time-1993 (df1993:y85212.dat)           OH        2      2        3 1993   0.690
13 Time-1989 (df1989.5:y19895406.dat)           PA        1      1        2 1989   2.546
14      Time-1997 (df1997:y97006.por)         <NA>       NA      3        5 1997   0.725
15 Time-1989 (df1989.5:y19895406.dat)           GA        3      3        5 1989   1.487
16      Time-1997 (df1997:y97006.por)         <NA>       NA      3        5 1997   0.774
17 Time-1989 (df1989.5:y19895406.dat)           NC        3      3        5 1989   0.881
18      Time-1993 (df1993:y85212.dat)           WI        2      2        3 1993   0.672
19 Time-1989 (df1989.5:y19895406.dat)           RI        1      1        1 1989   0.692
20      Time-1997 (df1997:y97006.por)         <NA>       NA      3        5 1997   0.674
21 Time-1989 (df1989.5:y19895406.dat)           NY        1      1        2 1989   0.898
22      Time-1997 (df1997:y97006.por)         <NA>       NA      3        6 1997   2.152
23 Time-1989 (df1989.5:y19895406.dat)           CT        1      1        1 1989   0.626
24      Time-1993 (df1993:y85212.dat)           IN        2      2        3 1993   0.847
25 Time-1989 (df1989.5:y19895406.dat)           NY        1      1        2 1989   1.449
26      Time-1997 (df1997:y97006.por)         <NA>       NA      3        6 1997   0.963
27 Time-1989 (df1989.5:y19895406.dat)           TX        3      3        7 1989   1.174
28      Time-1997 (df1997:y97006.por)         <NA>       NA      3        7 1997   2.354
   raceXage race2Xage raceXgender race2Xgender raceXyear race2Xyear raceXregion race2Xregion
1       1_1       1_1         1_1          1_1    1_1989     1_1989         1_1          1_1
2       1_4       1_4         1_1          1_1    1_1997     1_1997         1_3          1_3
3       1_3       1_3         1_1          1_1    1_1989     1_1989         1_3          1_3
4       2_1       2_1         2_2          2_2    2_1997     2_1997         2_3          2_3
5       1_2       1_2         1_1          1_1    1_1989     1_1989         1_3          1_3
6       1_2       1_2         1_2          1_2    1_1993     1_1993         1_2          1_2
7       1_2       1_2         1_1          1_1    1_1989     1_1989         1_3          1_3
8      <NA>      <NA>         2_2          2_2    2_1997     2_1997         2_3          2_3
9       1_1       1_1         1_1          1_1    1_1989     1_1989         1_1          1_1
10      2_2       2_2         2_2          2_2    2_1997     2_1997         2_3          2_3
11      1_2       1_2         1_1          1_1    1_1989     1_1989         1_3          1_3
12      1_4       1_4         1_1          1_1    1_1993     1_1993         1_2          1_2
13      2_1       2_1         2_1          2_1    2_1989     2_1989         2_1          2_1
14     <NA>      <NA>         1_2          1_2    1_1997     1_1997         1_3          1_3
15      1_3       1_3         1_1          1_1    1_1989     1_1989         1_3          1_3
16      1_4       1_4         1_2          1_2    1_1997     1_1997         1_3          1_3
17      1_1       1_1         1_1          1_1    1_1989     1_1989         1_3          1_3
18      1_2       1_2         1_1          1_1    1_1993     1_1993         1_2          1_2
19     <NA>      <NA>         1_1          1_1    1_1989     1_1989         1_1          1_1
20     <NA>      <NA>         1_1          1_1    1_1997     1_1997         1_3          1_3
21      1_1       1_1         1_1          1_1    1_1989     1_1989         1_1          1_1
22      2_1       2_1         2_2          2_2    2_1997     2_1997         2_3          2_3
23     <NA>      <NA>         1_1          1_1    1_1989     1_1989         1_1          1_1
24      1_3       1_3         1_2          1_2    1_1993     1_1993         1_2          1_2
25      1_4       1_4         1_1          1_1    1_1989     1_1989         1_1          1_1
26      1_4       1_4         1_2          1_2    1_1997     1_1997         1_3          1_3
27      1_1       1_1         1_1          1_1    1_1989     1_1989         1_3          1_3
28      1_1       1_1         1_1          1_1    1_1997     1_1997         1_3          1_3
   raceXdivision race2Xdivision raceXstate_alpha2 race2Xstate_alpha2 raceXed race2Xed alt
1            1_1            1_1              1_MA               1_MA     1_1      1_1  NA
2            1_5            1_5              <NA>               <NA>     1_4      1_4  NA
3            1_5            1_5              1_MD               1_MD     1_1      1_1  NA
4            2_6            2_6              <NA>               <NA>     2_4      2_4  NA
5            1_5            1_5              1_NC               1_NC     1_3      1_3  NA
6            1_4            1_4              1_IA               1_IA     1_2      1_2  NA
7            1_5            1_5              1_MD               1_MD     1_2      1_2  NA
8            2_5            2_5              <NA>               <NA>    <NA>     <NA>  NA
9            1_2            1_2              1_PA               1_PA     1_3      1_3  NA
10           2_5            2_5              <NA>               <NA>     2_3      2_3  NA
11           1_5            1_5              1_GA               1_GA     1_3      1_3  NA
12           1_3            1_3              1_OH               1_OH     1_4      1_4  NA
13           2_2            2_2              2_PA               2_PA     2_1      2_1  NA
14           1_5            1_5              <NA>               <NA>     1_3      1_3  NA
15           1_5            1_5              1_GA               1_GA     1_1      1_1  NA
16           1_5            1_5              <NA>               <NA>     1_4      1_4  NA
17           1_5            1_5              1_NC               1_NC     1_2      1_2  NA
18           1_3            1_3              1_WI               1_WI     1_4      1_4  NA
19           1_1            1_1              1_RI               1_RI     1_2      1_2  NA
20           1_5            1_5              <NA>               <NA>     1_3      1_3  NA
21           1_2            1_2              1_NY               1_NY     1_2      1_2  NA
22           2_6            2_6              <NA>               <NA>     2_2      2_2  NA
23           1_1            1_1              1_CT               1_CT     1_4      1_4  NA
24           1_3            1_3              1_IN               1_IN     1_2      1_2  NA
25           1_2            1_2              1_NY               1_NY     1_1      1_1  NA
26           1_6            1_6              <NA>               <NA>     1_3      1_3  NA
27           1_7            1_7              1_TX               1_TX     1_3      1_3  NA
28           1_7            1_7              <NA>               <NA>     1_1      1_1  NA
   deathpen
1        NA
2        NA
3        NA
4        NA
5        NA
6        NA
7        NA
8        NA
9        NA
10       NA
11       NA
12       NA
13       NA
14       NA
15       NA
16       NA
17       NA
18       NA
19       NA
20       NA
21       NA
22       NA
23       NA
24       NA
25       NA
26       NA
27       NA
28       NA
 [ reached 'max' / getOption("max.print") -- omitted 485068 rows ]

> #add question interactions
> #with question
> finaldf$raceXquestion<-paste0(finaldf$race,"_",finaldf$question)

> finaldf$race2Xquestion<-paste0(finaldf$race2,"_",finaldf$question)

> #q's w/ alterntive
> finaldf$raceXalt<-paste0(finaldf$race,"_",finaldf$alt)

> finaldf$race2Xalt<-paste0(finaldf$race2,"_",finaldf$alt)

> #q's w/ deathpen
> finaldf$raceXdeathpen<-paste0(finaldf$race,"_",finaldf$deathpen)

> finaldf$race2Xdeathpen<-paste0(finaldf$race2,"_",finaldf$alt)

> #and some extra interactions
> #for models w/ xrace interactions
> #add a couple more interactions here
> finaldf$raceXdivisionXyear<-paste0(
+   finaldf$race,
+   "_",
+   finaldf$division,
+   "_",
+   finaldf$year
+ )

> finaldf$raceXedXyear<-paste0(
+   finaldf$race,
+   "_",
+   finaldf$ed,
+   "_",
+   finaldf$year
+ )

> ##########################################################
> ##########################################################
> 
> #FINALIZE/SAVE OUT
> 
> #how many respondents are in here twice or more
> tmptable<-table(finaldf$respid)

> 100*sum(tmptable==1)/length(tmptable)
[1] 53.66738

> 100*sum(tmptable==2)/length(tmptable)
[1] 19.84007

> 100*sum(tmptable==3)/length(tmptable)
[1] 23.41457

> 100*sum(tmptable==4)/length(tmptable) 
[1] 2.656344

> 100*sum(tmptable>5)/length(tmptable) 
[1] 0

> #how many respondents from original don't give any info?
> tmp<-!fulldf$respid%in%finaldf$respid

> missing.respondents<-fulldf$respid[tmp] %>%
+   unique

> length(missing.respondents)
[1] 25550

> #take a look
> tmp<-missing.respondents[1:10]

> fulldf[fulldf$respid%in%tmp,]
                                 id statename state_fips state_alpha2 region region.num
                             <char>    <char>      <int>       <fctr>  <int>      <int>
 1: ABC-1982 (df1982.5:Awp7598.dat)      <NA>         NA         <NA>      4          4
 2: ABC-1982 (df1982.5:Awp7598.dat)      <NA>         NA         <NA>      2          2
 3: ABC-1982 (df1982.5:Awp7598.dat)      <NA>         NA         <NA>      2          2
 4: ABC-1982 (df1982.5:Awp7598.dat)      <NA>         NA         <NA>      2          2
    division division.num weights  year birthyear black white   age female  ed_f
       <int>        <int>   <num> <int>     <num> <int> <int> <num>  <int> <int>
 1:        9            9    1.61  1982      1906     0     1     4      1     1
 2:        3            3    0.81  1982      1922     0     1     3      1     2
 3:        4            4    0.81  1982      1926     0     1     3      1     2
 4:        3            3    0.83  1982      1919     0     1     3      0     3
    deathpen.abc_p deathpen.abc_np deathpen.abc_n preferdp.abc_p preferdp.abc_np
             <int>           <int>          <int>          <int>           <int>
 1:             NA              NA             NA             NA              NA
 2:             NA              NA             NA             NA              NA
 3:             NA              NA             NA             NA              NA
 4:             NA              NA             NA             NA              NA
    preferdp.abc_n deathpen.abc_pt preferdp.abc_pt police.anes_m police.anes_nm police.anes_n
             <int>           <int>           <int>         <int>          <int>         <int>
 1:             NA              NA              NA            NA             NA            NA
 2:             NA              NA              NA            NA             NA            NA
 3:             NA              NA              NA            NA             NA            NA
 4:             NA              NA              NA            NA             NA            NA
    useforce.anes_p useforce.anes_np useforce.anes_n raccused.anes_p raccused.anes_np
              <int>            <int>           <int>           <int>            <int>
 1:              NA               NA              NA              NA               NA
 2:              NA               NA              NA              NA               NA
 3:              NA               NA              NA              NA               NA
 4:              NA               NA              NA              NA               NA
    raccused.anes_n spendmore.anes_a spendmore.anes_na spendmore.anes_n police.anes_mt
              <int>            <int>             <int>            <int>          <int>
 1:              NA               NA                NA               NA             NA
 2:              NA               NA                NA               NA             NA
 3:              NA               NA                NA               NA             NA
 4:              NA               NA                NA               NA             NA
    useforce.anes_pt raccused.anes_pt spendmore.anes_at worryvictim.cbs_a worryvictim.cbs_na
               <int>            <int>             <int>             <int>              <int>
 1:               NA               NA                NA                NA                 NA
 2:               NA               NA                NA                NA                 NA
 3:               NA               NA                NA                NA                 NA
 4:               NA               NA                NA                NA                 NA
    worryvictim.cbs_n worryvictim.cbs_at moreimppunish.gallup_p moreimppunish.gallup_np
                <int>              <int>                  <int>                   <int>
 1:                NA                 NA                     NA                      NA
 2:                NA                 NA                     NA                      NA
 3:                NA                 NA                     NA                      NA
 4:                NA                 NA                     NA                      NA
    moreimppunish.gallup_n deathpen.gallup_p deathpen.gallup_np deathpen.gallup_n
                     <int>             <int>              <int>             <int>
 1:                     NA                NA                 NA                NA
 2:                     NA                NA                 NA                NA
 3:                     NA                NA                 NA                NA
 4:                     NA                NA                 NA                NA
    policerespect.gallup_m policerespect.gallup_nm policerespect.gallup_n
                     <int>                   <int>                  <int>
 1:                     NA                      NA                     NA
 2:                     NA                      NA                     NA
 3:                     NA                      NA                     NA
 4:                     NA                      NA                     NA
    courtsharsh.gallup_p courtsharsh.gallup_np courtsharsh.gallup_n policebrutality.gallup_m
                   <int>                 <int>                <int>                    <int>
 1:                   NA                    NA                   NA                       NA
 2:                   NA                    NA                   NA                       NA
 3:                   NA                    NA                   NA                       NA
 4:                   NA                    NA                   NA                       NA
    policebrutality.gallup_nm policebrutality.gallup_n deterrent.gallup_p deterrent.gallup_np
                        <int>                    <int>              <int>               <int>
 1:                        NA                       NA                 NA                  NA
 2:                        NA                       NA                 NA                  NA
 3:                        NA                       NA                 NA                  NA
 4:                        NA                       NA                 NA                  NA
    deterrent.gallup_n preferdp.gallup_p preferdp.gallup_np preferdp.gallup_n
                 <int>             <int>              <int>             <int>
 1:                 NA                NA                 NA                NA
 2:                 NA                NA                 NA                NA
 3:                 NA                NA                 NA                NA
 4:                 NA                NA                 NA                NA
    courtsconvconf.gallup_m courtsconvconf.gallup_nm courtsconvconf.gallup_n
                      <int>                    <int>                   <int>
 1:                      NA                       NA                      NA
 2:                      NA                       NA                      NA
 3:                      NA                       NA                      NA
 4:                      NA                       NA                      NA
    policeprotect.gallup_m policeprotect.gallup_nm policeprotect.gallup_n
                     <int>                   <int>                  <int>
 1:                     NA                      NA                     NA
 2:                     NA                      NA                     NA
 3:                     NA                      NA                     NA
 4:                     NA                      NA                     NA
    moreprisons.gallup_p moreprisons.gallup_np moreprisons.gallup_n cjustconfidence.gallup_m
                   <int>                 <int>                <int>                    <int>
 1:                   NA                    NA                   NA                       NA
 2:                   NA                    NA                   NA                       NA
 3:                   NA                    NA                   NA                       NA
 4:                   NA                    NA                   NA                       NA
    cjustconfidence.gallup_nm cjustconfidence.gallup_n policeconfidence.gallup_m
                        <int>                    <int>                     <int>
 1:                        NA                       NA                        NA
 2:                        NA                       NA                        NA
 3:                        NA                       NA                        NA
 4:                        NA                       NA                        NA
    policeconfidence.gallup_nm policeconfidence.gallup_n crimebill.gallup_p
                         <int>                     <int>              <int>
 1:                         NA                        NA                 NA
 2:                         NA                        NA                 NA
 3:                         NA                        NA                 NA
 4:                         NA                        NA                 NA
    crimebill.gallup_np crimebill.gallup_n policehonesty.gallup_m policehonesty.gallup_nm
                  <int>              <int>                  <int>                   <int>
 1:                  NA                 NA                     NA                      NA
 2:                  NA                 NA                     NA                      NA
 3:                  NA                 NA                     NA                      NA
 4:                  NA                 NA                     NA                      NA
    policehonesty.gallup_n moreimppunish.gallup_pt deathpen.gallup_pt policerespect.gallup_mt
                     <int>                   <int>              <int>                   <int>
 1:                     NA                      NA                 NA                      NA
 2:                     NA                      NA                 NA                      NA
 3:                     NA                      NA                 NA                      NA
 4:                     NA                      NA                 NA                      NA
    courtsharsh.gallup_pt policebrutality.gallup_mt deterrent.gallup_pt preferdp.gallup_pt
                    <int>                     <int>               <int>              <int>
 1:                    NA                        NA                  NA                 NA
 2:                    NA                        NA                  NA                 NA
 3:                    NA                        NA                  NA                 NA
 4:                    NA                        NA                  NA                 NA
    courtsconvconf.gallup_mt policeprotect.gallup_mt moreprisons.gallup_pt
                       <int>                   <int>                 <int>
 1:                       NA                      NA                    NA
 2:                       NA                      NA                    NA
 3:                       NA                      NA                    NA
 4:                       NA                      NA                    NA
    cjustconfidence.gallup_mt policeconfidence.gallup_mt crimebill.gallup_pt
                        <int>                      <int>               <int>
 1:                        NA                         NA                  NA
 2:                        NA                         NA                  NA
 3:                        NA                         NA                  NA
 4:                        NA                         NA                  NA
    policehonesty.gallup_mt deathpen.gss_p deathpen.gss_np deathpen.gss_n natcrime.gss_a
                      <int>          <int>           <int>          <int>          <int>
 1:                      NA             NA              NA             NA             NA
 2:                      NA             NA              NA             NA             NA
 3:                      NA             NA              NA             NA             NA
 4:                      NA             NA              NA             NA             NA
    natcrime.gss_na natcrime.gss_n natcrimy.gss_a natcrimy.gss_na natcrimy.gss_n
              <int>          <int>          <int>           <int>          <int>
 1:              NA             NA             NA              NA             NA
 2:              NA             NA             NA              NA             NA
 3:              NA             NA             NA              NA             NA
 4:              NA             NA             NA              NA             NA
    natcrimz.gss_a natcrimz.gss_na natcrimz.gss_n courtsharsh.gss_p courtsharsh.gss_np
             <int>           <int>          <int>             <int>              <int>
 1:             NA              NA             NA                NA                 NA
 2:             NA              NA             NA                NA                 NA
 3:             NA              NA             NA                NA                 NA
 4:             NA              NA             NA                NA                 NA
    courtsharsh.gss_n sppolice.gss_a sppolice.gss_na sppolice.gss_n cjustconfidence.gss_m
                <int>          <int>           <int>          <int>                 <int>
 1:                NA             NA              NA             NA                    NA
 2:                NA             NA              NA             NA                    NA
 3:                NA             NA              NA             NA                    NA
 4:                NA             NA              NA             NA                    NA
    cjustconfidence.gss_nm cjustconfidence.gss_n deathpen.gss_pt natcrime.gss_at
                     <int>                 <int>           <int>           <int>
 1:                     NA                    NA              NA              NA
 2:                     NA                    NA              NA              NA
 3:                     NA                    NA              NA              NA
 4:                     NA                    NA              NA              NA
    natcrimy.gss_at natcrimz.gss_at courtsharsh.gss_pt sppolice.gss_at cjustconfidence.gss_mt
              <int>           <int>              <int>           <int>                  <int>
 1:              NA              NA                 NA              NA                     NA
 2:              NA              NA                 NA              NA                     NA
 3:              NA              NA                 NA              NA                     NA
 4:              NA              NA                 NA              NA                     NA
    policehonesty.nbc_m policehonesty.nbc_nm policehonesty.nbc_n canrehab.lat_p
                  <int>                <int>               <int>          <int>
 1:                  NA                   NA                  NA             NA
 2:                  NA                   NA                  NA             NA
 3:                  NA                   NA                  NA             NA
 4:                  NA                   NA                  NA             NA
    canrehab.lat_np canrehab.lat_n rehabilitate.lat_p rehabilitate.lat_np rehabilitate.lat_n
              <int>          <int>              <int>               <int>              <int>
 1:              NA             NA                 NA                  NA                 NA
 2:              NA             NA                 NA                  NA                 NA
 3:              NA             NA                 NA                  NA                 NA
 4:              NA             NA                 NA                  NA                 NA
    moreprisons.lat_p moreprisons.lat_np moreprisons.lat_n policehonesty.nbc_mt
                <int>              <int>             <int>                <int>
 1:                NA                 NA                NA                   NA
 2:                NA                 NA                NA                   NA
 3:                NA                 NA                NA                   NA
 4:                NA                 NA                NA                   NA
    canrehab.lat_pt rehabilitate.lat_pt moreprisons.lat_pt crimespend.roper_a
              <int>               <int>              <int>              <int>
 1:              NA                  NA                 NA                 NA
 2:              NA                  NA                 NA                 NA
 3:              NA                  NA                 NA                 NA
 4:              NA                  NA                 NA                 NA
    crimespend.roper_na crimespend.roper_n confidence.roper_m confidence.roper_nm
                  <int>              <int>              <int>               <int>
 1:                  NA                 NA                 NA                  NA
 2:                  NA                 NA                 NA                  NA
 3:                  NA                 NA                 NA                  NA
 4:                  NA                 NA                 NA                  NA
    confidence.roper_n deathpen.roper_p deathpen.roper_np deathpen.roper_n harsher.roper_p
                 <int>            <int>             <int>            <int>           <int>
 1:                 NA               NA                NA               NA              NA
 2:                 NA               NA                NA               NA              NA
 3:                 NA               NA                NA               NA              NA
 4:                 NA               NA                NA               NA              NA
    harsher.roper_np harsher.roper_n crimespend.roper_at confidence.roper_mt
               <int>           <int>               <int>               <int>
 1:               NA              NA                  NA                  NA
 2:               NA              NA                  NA                  NA
 3:               NA              NA                  NA                  NA
 4:               NA              NA                  NA                  NA
    deathpen.roper_pt harsher.roper_pt deathpen.time_p deathpen.time_np deathpen.time_n
                <int>            <int>           <int>            <int>           <int>
 1:                NA               NA              NA               NA              NA
 2:                NA               NA              NA               NA              NA
 3:                NA               NA              NA               NA              NA
 4:                NA               NA              NA               NA              NA
    worryvictim.time_a worryvictim.time_na worryvictim.time_n adeqprotect.time_a
                 <int>               <int>              <int>              <int>
 1:                 NA                  NA                 NA                 NA
 2:                 NA                  NA                 NA                 NA
 3:                 NA                  NA                 NA                 NA
 4:                 NA                  NA                 NA                 NA
    adeqprotect.time_na adeqprotect.time_n deathpen.time_pt worryvictim.time_at
                  <int>              <int>            <int>               <int>
 1:                  NA                 NA               NA                  NA
 2:                  NA                 NA               NA                  NA
 3:                  NA                 NA               NA                  NA
 4:                  NA                 NA               NA                  NA
    adeqprotect.time_at respid gender  race race2    ed age.old cohort
                  <int>  <int>  <num> <num> <num> <int>   <num>  <num>
 1:                  NA   2224      2     1     1     1      76      1
 2:                  NA   2225      2     1     1     2      60      1
 3:                  NA   2226      2     1     1     2      56      2
 4:                  NA   2227      1     1     1     3      63      1
                             pollid regionDC divisionDC raceXage race2Xage raceXgender
                             <char>    <num>      <num>   <char>    <char>      <char>
 1: ABC-1982 (df1982.5:Awp7598.dat)       NA         NA      1_4       1_4         1_2
 2: ABC-1982 (df1982.5:Awp7598.dat)       NA         NA      1_3       1_3         1_2
 3: ABC-1982 (df1982.5:Awp7598.dat)       NA         NA      1_3       1_3         1_2
 4: ABC-1982 (df1982.5:Awp7598.dat)       NA         NA      1_3       1_3         1_1
    race2Xgender raceXyear race2Xyear raceXregion race2Xregion raceXdivision race2Xdivision
          <char>    <char>     <char>      <char>       <char>        <char>         <char>
 1:          1_2    1_1982     1_1982         1_4          1_4           1_9            1_9
 2:          1_2    1_1982     1_1982         1_2          1_2           1_3            1_3
 3:          1_2    1_1982     1_1982         1_2          1_2           1_4            1_4
 4:          1_1    1_1982     1_1982         1_2          1_2           1_3            1_3
    raceXstate_alpha2 race2Xstate_alpha2 raceXed race2Xed
               <char>             <char>  <char>   <char>
 1:              <NA>               <NA>     1_1      1_1
 2:              <NA>               <NA>     1_2      1_2
 3:              <NA>               <NA>     1_2      1_2
 4:              <NA>               <NA>     1_3      1_3
 [ reached getOption("max.print") -- omitted 6 rows ]

> #from how many different polls?
> tmp<-fulldf$respid%in%missing.respondents

> polls.wmissing<-fulldf$pollid[tmp]

> table(polls.wmissing) %>% sort(decreasing=T)
polls.wmissing
                           ANES-1986                            ANES-1988 
                                2176                                 2040 
                           ANES-1990                            ANES-1952 
                                1980                                 1899 
                           ANES-1956                            ANES-1980 
                                1762                                 1614 
                           ANES-1964                            ANES-1958 
                                1571                                 1450 
                           ANES-1982                            ANES-1962 
                                1418                                 1297 
                           ANES-1998                            ANES-1960 
                                1281                                 1181 
                           ANES-1954      ABC-1982 (df1982.5:Awp7598.dat) 
                                1139                                  754 
                           ANES-1948   Gallup-1992 (df1992.7:g322014.por) 
                                 662                                  659 
      NBC-1995 (df1995:Nbcw4063.dat)                            ANES-1972 
                                 632                                  437 
    Gallup-1997 (df1997:9708018.dat)                            ANES-1984 
                                 432                                  330 
                           ANES-2002     Time-2003 (df2003:time01_03.dat) 
                                 172                                  101 
         ABC-2005 (df2005:aw980.por)     Time-1989 (df1989:y19985415.por) 
                                  84                                   80 
                           ANES-1968    Gallup-1955 (df1955:aipo0550.dat) 
                                  36                                   26 
  Roper-1975 (df1975.5:rprr7501.dat)                            ANES-1978 
                                  22                                   19 
   Gallup-1957 (df1957:aipo0588.dat)                            ANES-1966 
                                  19                                   18 
                           ANES-2012         Gallup-1956 (df1956:562.dat) 
                                  15                                   15 
                           ANES-2004   Roper-1974 (df1974.5:rprr7401.dat) 
                                  14                                   13 
                           ANES-1994         Gallup-1960 (df1960:625.dat) 
                                  12                                   12 
    Roper-1974 (df1974:rprr7409.dat)     Roper-1975 (df1975:rprr7509.dat) 
                                  12                                   12 
    Roper-1978 (df1978.5:RR7801.DAT)     Roper-1976 (df1976.5:rr7601.por) 
                                  12                                   11 
    Roper-1973 (df1973:rprr7309.dat)   Roper-1979 (df1979.5:Roper791.dat) 
                                  10                                   10 
                           ANES-1974     Roper-1977 (df1977:ROPER771.DAT) 
                                   9                                    9 
  Roper-1981 (df1981.5:rprr8101.dat)                            ANES-1976 
                                   9                                    8 
 Gallup-1969 (df1969.7:aipo6955.dat)     Roper-1981 (df1981:rprr8102.dat) 
                                   8                                    8 
    Roper-1976 (df1976:rprr7609.dat)     Roper-1979 (df1979:rprr7909.dat) 
                                   7                                    7 
    Roper-1982 (df1982:rprr8209.dat)                            ANES-1992 
                                   7                                    6 
    Roper-1982 (df1982.5:rr8201.dat)           ABC-2006 (df2006:1015.por) 
                                   5                                    4 
        Gallup-1972 (df1972:846.dat)       NBC-1985 (df1985:n1985may.por) 
                                   4                                    4 
                           ANES-2008  Gallup-1965 (df1965.7:AIPO0716.dat) 
                                   3                                    3 
   Roper-1978 (df1978:ROPER7802.dat)                            ANES-1996 
                                   3                                    2 
       ABC-2003 (df2003:abcw909.por)        ABC-2005 (df2005.5:aw985.por) 
                                   1                                    1 
   Gallup-1966 (df1966:aipo0729.dat)       Gallup-1969 (df1969.5:773.dat) 
                                   1                                    1 
Gallup-2009 (df2009.5:usa200921.por) 
                                   1 

> #these are people who didn't answer/weren't asked
> #nothing we can do about them, move on
> length(unique(finaldf$respid)) 
[1] 275115

> #our finaldf is about 275,115 respondents
> nrow(finaldf)
[1] 485096

> #if we ignore repeaters, 485,096 obs
> 
> ##########################################################
> ##########################################################
> 
> # OUTPUT A DESCRIPTIVE TABLE
> 
> require(xtable)
Loading required package: xtable
 

> finaldf<-data.table(finaldf)

> #how many respondents
> nrow(finaldf)
[1] 485096

> length(unique(finaldf$respid))
[1] 275115

> length(unique(finaldf$pollid))
[1] 176

> #create a table
> tabdf <- finaldf[
+   ,
+   .(
+     N=.N,
+     white=sum(race==1,na.rm=T),
+     black=sum(race==2,na.rm=T),
+     other=sum(race==3,na.rm=T),
+     range=paste0(min(year),"-",max(year))
+   )
+   ,
+   by=c('question')
+ ]

> #prettify
> tabdf$N<-prettyNum(tabdf$N,big.mark=',')

> tabdf$white<-prettyNum(tabdf$white,big.mark=',')

> tabdf$black<-prettyNum(tabdf$black,big.mark=',')

> tabdf$other<-prettyNum(tabdf$other,big.mark=',')

> names(tabdf)<-c('Question','Respondents','White','Black','Other','Period')

> tabdf_latex<- xtable(
+   tabdf,
+   align=c('l','l','|','l','l','l','l','l'),
+   caption='Information about Questions in the Public Opinion Sample',
+   type='latex'
+ )

> setwd(outputdir); dir()
 [1] "CleggFig1.pdf"                   "CleggFig2.pdf"                  
 [3] "CleggFig3.pdf"                   "CleggFig4.pdf"                  
 [5] "CleggFig5.pdf"                   "CleggFig6.pdf"                  
 [7] "CleggFig7.pdf"                   "fig_citerate.png"               
 [9] "fig_comparative.csv"             "fig_comparative.png"            
[11] "fig_dind_beoshock.png"           "fig_dind_prefests.csv"          
[13] "fig_dind_prefests.png"           "fig_dind_robests.csv"           
[15] "fig_dind_robests.png"            "fig_dind_welfare.csv"           
[17] "fig_dind_welfare.png"            "fig_map.png"                    
[19] "fig_po_alts.png"                 "fig_po_averages.png"            
[21] "fig_po_averages_elites.png"      "fig_po_averages2.png"           
[23] "fig_po_correlations.csv"         "fig_po_correlations.png"        
[25] "fig_po_diffs.png"                "fig_po_diffs_elites.png"        
[27] "fig_po_diffs2.png"               "fig_po_effectofrace.csv"        
[29] "fig_po_effectofrace.png"         "fig_po_effects.csv"             
[31] "fig_po_effects.png"              "fig_po_protests.csv"            
[33] "fig_po_protests.png"             "fig_po_raw.png"                 
[35] "fig_po_trends.csv"               "fig_po_trends.png"              
[37] "fig_po_trends_blackwhitegap.csv" "fig_po_trends_blackwhitegap.png"
[39] "fig_po_trends_byrace.csv"        "fig_po_trends_byrace.png"       
[41] "fig_po_trends_sparsest.png"      "fig_voting_differences.csv"     
[43] "fig_voting_differences.png"      "fig_voting_levels.csv"          
[45] "fig_voting_levels.png"           "fig_voting_levels_south.csv"    
[47] "fig_voting_levels_south.pdf"     "fig_voting_levels_south.png"    
[49] "fig_voting_presidents.csv"       "fig_voting_presidents.png"      
[51] "fig1_po_averages.pdf"            "fig2_po_trends.csv"             
[53] "fig2_po_trends.pdf"              "fig3_po_averages_elites.pdf"    
[55] "fig4_po_correlations.csv"        "fig4_po_correlations.pdf"       
[57] "fig4_po_correlations.png"        "fig5_voting_levels.csv"         
[59] "fig5_voting_levels.pdf"          "fig6_voting_levels_south.csv"   
[61] "fig6_voting_levels_south.pdf"    "fig7_dind_prefests.csv"         
[63] "fig7_dind_prefests.pdf"          "list_po_questions.tex"          
[65] "list_po_questions_EDIT.tex"      "runeverything_250619.log"       
[67] "runeverything_250620.log"        "runeverything_publicopinion.log"
[69] "tab_conventionalview.tex"        "tab_po_questions.tex"           
[71] "tab_po_questions_EDIT.tex"      

> print(
+   tabdf_latex,
+   include.rownames=F,
+   file='tab_po_questions_EDIT.tex'
+ )

> #also output the full text of these questions
> setwd(datadir); dir()
 [1] "abcdfs.csv"                                 "All_Bills(1947-2017).csv"                  
 [3] "american_violence_data_20241016_201046.csv" "anesdf.csv"                                
 [5] "beodf.csv"                                  "beodf5.csv"                                
 [7] "billsdf_revised.csv"                        "cbcdf.csv"                                 
 [9] "cbsdfs.csv"                                 "citecount.csv"                             
[11] "congressmen_list.csv"                       "conventional.bib"                          
[13] "cpopdf.csv"                                 "Ethnic_Collect_Action.dta"                 
[15] "fulldf_bills.csv"                           "gallupdfs.csv"                             
[17] "gssdf.csv"                                  "Hall_votes.csv"                            
[19] "histpundf_national_220328.csv"              "housevotes_handcoded_revised.csv"          
[21] "HSall_members.csv"                          "HSall_rollcalls_withissues.csv"            
[23] "incrates_subnationalstate.csv"              "lbj1964.csv"                               
[25] "nbclatdfs.csv"                              "prezdf.csv"                                
[27] "questions_fortex.txt"                       "race_riot.dta"                             
[29] "redf.csv"                                   "roperdfs.csv"                              
[31] "tab_longviolence.csv"                       "timedfs.csv"                               
[33] "votes"                                     

> qinfo<-readLines('questions_fortex.txt')

> setwd(outputdir); dir()
 [1] "CleggFig1.pdf"                   "CleggFig2.pdf"                  
 [3] "CleggFig3.pdf"                   "CleggFig4.pdf"                  
 [5] "CleggFig5.pdf"                   "CleggFig6.pdf"                  
 [7] "CleggFig7.pdf"                   "fig_citerate.png"               
 [9] "fig_comparative.csv"             "fig_comparative.png"            
[11] "fig_dind_beoshock.png"           "fig_dind_prefests.csv"          
[13] "fig_dind_prefests.png"           "fig_dind_robests.csv"           
[15] "fig_dind_robests.png"            "fig_dind_welfare.csv"           
[17] "fig_dind_welfare.png"            "fig_map.png"                    
[19] "fig_po_alts.png"                 "fig_po_averages.png"            
[21] "fig_po_averages_elites.png"      "fig_po_averages2.png"           
[23] "fig_po_correlations.csv"         "fig_po_correlations.png"        
[25] "fig_po_diffs.png"                "fig_po_diffs_elites.png"        
[27] "fig_po_diffs2.png"               "fig_po_effectofrace.csv"        
[29] "fig_po_effectofrace.png"         "fig_po_effects.csv"             
[31] "fig_po_effects.png"              "fig_po_protests.csv"            
[33] "fig_po_protests.png"             "fig_po_raw.png"                 
[35] "fig_po_trends.csv"               "fig_po_trends.png"              
[37] "fig_po_trends_blackwhitegap.csv" "fig_po_trends_blackwhitegap.png"
[39] "fig_po_trends_byrace.csv"        "fig_po_trends_byrace.png"       
[41] "fig_po_trends_sparsest.png"      "fig_voting_differences.csv"     
[43] "fig_voting_differences.png"      "fig_voting_levels.csv"          
[45] "fig_voting_levels.png"           "fig_voting_levels_south.csv"    
[47] "fig_voting_levels_south.pdf"     "fig_voting_levels_south.png"    
[49] "fig_voting_presidents.csv"       "fig_voting_presidents.png"      
[51] "fig1_po_averages.pdf"            "fig2_po_trends.csv"             
[53] "fig2_po_trends.pdf"              "fig3_po_averages_elites.pdf"    
[55] "fig4_po_correlations.csv"        "fig4_po_correlations.pdf"       
[57] "fig4_po_correlations.png"        "fig5_voting_levels.csv"         
[59] "fig5_voting_levels.pdf"          "fig6_voting_levels_south.csv"   
[61] "fig6_voting_levels_south.pdf"    "fig7_dind_prefests.csv"         
[63] "fig7_dind_prefests.pdf"          "list_po_questions.tex"          
[65] "list_po_questions_EDIT.tex"      "runeverything_250619.log"       
[67] "runeverything_250620.log"        "runeverything_publicopinion.log"
[69] "tab_conventionalview.tex"        "tab_po_questions.tex"           
[71] "tab_po_questions_EDIT.tex"      

> write(qinfo,'list_po_questions_EDIT.tex')

> ##########################################################
> ##########################################################
> 
> #save out
> setwd(filesdir)

> # rm("fulldf") #confusing me
> # save.image(file="prepped.RData")
> write.csv(
+   finaldf,
+   '01po_dataframe.csv',
+   row.names=F
+ )
[1] "######"
[1] "Running:"
[1] "03_summarize.R"

> #########################################################
> #########################################################
> 
> #clear workspace
> rm(list=ls())

> #load packages
> require(stringr)

> require(plyr)

> require(dplyr)

> require(zoo)

> require(data.table)

> require(tidyr)

> require(rprojroot)

> #extras
> require(boot)

> #set dirs
> rootdir<-find_root(
+   criterion=has_file('_rapol.Rproj')
+ )

> codedir<-file.path(rootdir,"code")

> setwd(codedir); dir()
[1] "01_publicopinion" "02_voting"        "03_dind"          "dirs.R"          
[5] "runeverything.R" 

> source('dirs.R')

> #helper functions
> setwd(pcodedir); dir()
 [1] "01_group.R"                "02_prep.R"                 "03_summarize.R"           
 [4] "04_summarize_output.R"     "05_regmods.R"              "06_regmods_output.R"      
 [7] "07_predict.R"              "08_predict_output.R"       "09_extra_output.R"        
[10] "10_RandR_summarize.R"      "11_RandR_model.R"          "12_RandR_modeloutput.R"   
[13] "functions.R"               "getcode.R"                 "getinfo2.R"               
[16] "readpoll_functions.R"      "XX_predict.R"              "XX_predict_output.R"      
[19] "XX_predict_outputTrends.R" "XX_predictscribbles.R"     "XX_regmods.R"             
[22] "XX_regmods_output.R"       "XX_runall.R"               "XX_scanmeta.R"            
[25] "XXdirs.R"                 

> source('readpoll_functions.R')

> source('getinfo2.R')

> source('functions.R')

> #load data
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> finaldf<-fread(
+   '01po_dataframe.csv'
+ )

> #########################################################
> #########################################################
> 
> #HELPER FUNCTION
> 
> #uses boot, returns y, ymin, ymax
> #for use w/ data.table
> 
> mean_se.boot<-function(x,w,raw=T) {
+   # x<-df$aff
+   # w<-df$weights
+   dist<-boot(
+     data=x,
+     stat=function(x,d) {
+       mean(x[d])
+     },
+     R=1000,
+     weights=w
+   )
+   if(raw==T) {
+     return(dist$t)
+   } else {
+     returndf<-summarize.distribution2(
+       dist$t
+     )
+     returndf$N<-length(x)
+     returndf$pval<-
+       returndf$pval.class<-NA
+     return(returndf)
+   }
+ }

> #########################################################
> #########################################################
> 
> 
> #FOR EACH Q
> #return mean/se, by race
> #since this is weighted, 
> #se should be booted
> questions<-unique(finaldf$question) %>%
+   sort

> tmpseq.i<-seq_along(questions) 

> fulloutput<-lapply(tmpseq.i,function(i) {
+   #i<-9
+   print(
+     paste(
+       i,"of",length(tmpseq.i)
+     )
+   )
+   
+   thisquestion<-questions[i]
+   
+   # #examine
+   # tmp<-finaldf$question==thisquestion &
+   #   finaldf$race==2
+   # thisdf<-finaldf[tmp,]
+   # sum(thisdf$aff)/nrow(thisdf)
+   # sum(thisdf$neg)/nrow(thisdf)
+   
+   #sum(thisdf$aff[thisdf$race==2])/
+   #  sum(thisdf$race==2) #76%
+   
+   #subset
+   thisdf<-finaldf[
+     question==thisquestion & 
+       !is.na(race) &
+       neut==0
+     ]
+   
+   #loop through and return
+   #bootstrapped distribution of means
+   #for each poll and each race
+   loopdf<-expand.grid(
+     pollid=unique(thisdf$pollid),
+     race=unique(thisdf$race),
+     stringsAsFactors=F
+   )
+   loopdf$name<-apply(
+     loopdf,1,paste0,collapse="."
+   )
+   tmpseq.j<-1:nrow(loopdf)
+   fulloutput<-lapply(tmpseq.j,function(j) {
+     #j<-1
+     df<-thisdf[
+       pollid==loopdf$pollid[j] &
+         race==loopdf$race[j]
+       ]
+     if(nrow(df)>0) {
+       returndist<-mean_se.boot(
+         df$aff,
+         df$weights
+       )
+     } else {
+       returndist<-NULL
+     }
+     100 * returndist
+   })
+   names(fulloutput)<-loopdf$name
+   
+   
+   # ############
+   # #METHOD 1
+   # 
+   # #AVERAGES
+   # #for each race,
+   # #put together all boot distributions
+   # #and compute quantiles
+   # races<-unique(thisdf$race) %>%
+   #   sort
+   # tmpseq.j<-seq_along(races)
+   # avgdf<-lapply(tmpseq.j,function(j) {
+   #   #j<-1
+   #   thisrace<-races[j]
+   #   tmp<-str_detect(
+   #     names(fulloutput),
+   #     paste0(thisrace,"$")
+   #   )
+   #   tmpdist<-fulloutput[tmp] %>% 
+   #     unlist %>%
+   #     unname
+   #   returndf<-data.frame(
+   #     quantile(
+   #       tmpdist,
+   #       c(0.5,0.025,0.975)
+   #     ) %>% t
+   #   )
+   #   names(returndf)<-c("mu","mu.min","mu.max")
+   #   returndf$question<-thisquestion
+   #   returndf$race<-thisrace
+   #   returndf
+   # }) %>% rbind.fill
+   # avgdf$dimension<-getcode(
+   #   thisquestion,
+   #   "question",
+   #   "dimension",
+   #   questionsdf
+   # )
+   # 
+   # #to get race diffs
+   # #loop through questions
+   # #and compute black-white difference
+   # #and then compute quantiles
+   # polls<-unique(thisdf$pollid)
+   # tmpseq.j<-seq_along(polls)
+   # diffdist<-lapply(tmpseq.j,function(j) {
+   #   #j<-1
+   #   thispoll<-polls[j]
+   #   tmp<-str_detect(
+   #     names(fulloutput),
+   #     fixed(thispoll)
+   #   )
+   #   tmpoutput<-fulloutput[tmp]
+   #   #we want black-white difference
+   #   whites<-tmpoutput[
+   #     str_detect(
+   #       names(tmpoutput),
+   #       "1$"
+   #     )
+   #     ] %>% unlist %>%
+   #     unname
+   #   blacks<-tmpoutput[
+   #     str_detect(
+   #       names(tmpoutput),
+   #       "2$"
+   #     )
+   #     ] %>% unlist %>% 
+   #     unname
+   #   if(length(whites)==length(blacks)) {
+   #     y<-whites-blacks
+   #   } else {
+   #     y<-NULL
+   #   }
+   #   return(y)
+   # })
+   # tmpdist<-diffdist %>%
+   #   unlist %>%
+   #   unname
+   # diffdf<-data.frame(
+   #   quantile(
+   #     tmpdist,
+   #     c(0.5,0.025,0.975)
+   #   ) %>% t
+   # )
+   # diffdf
+   # names(diffdf)<-c("mu","mu.min","mu.max")
+   # diffdf$question<-thisquestion
+   # diffdf$dimension<-getcode(
+   #   thisquestion,
+   #   "question",
+   #   "dimension",
+   #   questionsdf
+   # )
+   
+   ############
+   #METHOD 2
+   
+   
+   #GET DIST OF MEANS OF MEANS
+   races<-unique(thisdf$race) %>%
+     sort
+   tmpseq.j<-seq_along(races)
+   mydists<-lapply(tmpseq.j,function(j) {
+     #j<-1
+     thisrace<-races[j]
+     tmp<-str_detect(
+       names(fulloutput),
+       paste0(thisrace,"$")
+     )
+     tmpoutput<-fulloutput[tmp]
+     tmpmat<-do.call(cbind,tmpoutput)
+     apply(tmpmat,1,mean)
+   })
+   names(mydists)<-races
+   
+   #AVERAGES
+   avgdf<-lapply(tmpseq.j,function(j) {
+     thisrace<-races[j]
+     thisdist<-mydists[[j]]
+     returndf<-summarize.distribution2(
+       thisdist
+     )
+     returndf$pval<-returndf$pval.class<-NULL
+     returndf$question<-thisquestion
+     returndf$race<-thisrace
+     returndf
+   }) %>% rbind.fill
+   avgdf$dimension<-getcode(
+     thisquestion,
+     "question",
+     "dimension",
+     questionsdf
+   )
+   
+   #DIFFERENCES
+   whites<-mydists[["1"]]
+   blacks<-mydists[["2"]]
+   tmpdist<-whites-blacks
+   diffdf<-summarize.distribution2(
+     tmpdist
+   )
+   diffdf$question<-thisquestion
+   diffdf$dimension<-getcode(
+     thisquestion,
+     "question",
+     "dimension",
+     questionsdf
+   )
+   
+   ############
+   
+   #return all this
+   list(
+     avgdf=avgdf,
+     diffdf=diffdf
+   )
+ })
[1] "1 of 39"
[1] "2 of 39"
[1] "3 of 39"
[1] "4 of 39"
[1] "5 of 39"
[1] "6 of 39"
[1] "7 of 39"
[1] "8 of 39"
[1] "9 of 39"
[1] "10 of 39"
[1] "11 of 39"
[1] "12 of 39"
[1] "13 of 39"
[1] "14 of 39"
[1] "15 of 39"
[1] "16 of 39"
[1] "17 of 39"
[1] "18 of 39"
[1] "19 of 39"
[1] "20 of 39"
[1] "21 of 39"
[1] "22 of 39"
[1] "23 of 39"
[1] "24 of 39"
[1] "25 of 39"
[1] "26 of 39"
[1] "27 of 39"
[1] "28 of 39"
[1] "29 of 39"
[1] "30 of 39"
[1] "31 of 39"
[1] "32 of 39"
[1] "33 of 39"
[1] "34 of 39"
[1] "35 of 39"
[1] "36 of 39"
[1] "37 of 39"
[1] "38 of 39"
[1] "39 of 39"

> #########################################################
> #########################################################
> 
> #save out dfs w/ some info
> 
> ###AVERAGES
> infodf<-finaldf[
+   !is.na(race) &
+     neut==0
+   ,
+   .(N=length(unique(respid)))
+   ,
+   by=c(
+     "question",
+     "race"
+   )
+   ]

> avgdf<-lapply(fulloutput,function(x) x$avgdf) %>% 
+   rbind.fill

> avgdf<-merge(
+   avgdf,
+   infodf
+ )

> setwd(filesdir)

> write.csv(
+   avgdf,
+   "01po_q_avgs.csv",
+   row.names=F
+ )

> ###DIFFERENCES
> infodf<-finaldf[
+   race%in%c(1,2) &
+     !is.na(race) &
+     neut==0
+   ,
+   .(N=length(unique(respid)))
+   ,
+   by=c(
+     "question"
+   )
+   ]

> diffdf<-lapply(fulloutput,function(x) x$diffdf) %>% 
+   rbind.fill

> diffdf<-merge(
+   diffdf,
+   infodf
+ )

> setwd(filesdir)

> write.csv(
+   diffdf,
+   "01po_q_diffs.csv",
+   row.names=F
+ )
[1] "######"
[1] "Running:"
[1] "04_summarize_output.R"

> #########################################################
> #########################################################
> 
> #clear workspace
> rm(list=ls())

> #load packages
> require(stringr)

> require(plyr)

> require(dplyr)

> require(zoo)

> require(data.table)

> require(tidyr)

> require(rprojroot)

> #set dirs
> rootdir<-find_root(
+   criterion=has_file('_rapol.Rproj')
+ )

> codedir<-file.path(rootdir,"code")

> setwd(codedir); dir()
[1] "01_publicopinion" "02_voting"        "03_dind"          "dirs.R"          
[5] "runeverything.R" 

> source('dirs.R')

> #helper functions
> setwd(pcodedir); dir()
 [1] "01_group.R"                "02_prep.R"                 "03_summarize.R"           
 [4] "04_summarize_output.R"     "05_regmods.R"              "06_regmods_output.R"      
 [7] "07_predict.R"              "08_predict_output.R"       "09_extra_output.R"        
[10] "10_RandR_summarize.R"      "11_RandR_model.R"          "12_RandR_modeloutput.R"   
[13] "functions.R"               "getcode.R"                 "getinfo2.R"               
[16] "readpoll_functions.R"      "XX_predict.R"              "XX_predict_output.R"      
[19] "XX_predict_outputTrends.R" "XX_predictscribbles.R"     "XX_regmods.R"             
[22] "XX_regmods_output.R"       "XX_runall.R"               "XX_scanmeta.R"            
[25] "XXdirs.R"                 

> source('readpoll_functions.R')

> source('getinfo2.R')

> source('functions.R')

> #########################################################
> #########################################################
> 
> #GATHER DATA/COMBINE
> #load data
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> #AVERAGES
> avgdf<-read.csv(
+   '01po_q_avgs.csv',
+   stringsAsFactors=F
+ )

> #DEPRECATED
> # tmpdf<-read.csv(
> #   'q_avghat.csv',
> #   stringsAsFactors=F
> # )
> # tmpdf$question<-paste0(
> #   tmpdf$dimension,".sum"
> # )
> # #don't need all.sum
> # tmp<-tmpdf$question=="all.sum"
> # tmpdf<-tmpdf[!tmp,]
> # tmpdf$summary<-T
> # 
> # #won't show average, 
> # #since this is confounded by year
> # #so the level is very difficult to interpret
> # #it's better interpreted off the 
> # tmpdf$mu<-tmpdf$mu.min<-tmpdf$mu.max<-NA
> # #if you want to show it, comment out line above
> # 
> # 
> # #before, when estimated black/white
> # if("race2"%in%names(tmpdf)) {
> #   tmp<-names(tmpdf)=="race2"
> #   names(tmpdf)[tmp]<-"race"
> # }
> # 
> # keepvars<-c(
> #   "question",
> #   "dimension",
> #   "race",
> #   "summary",
> #   "mu",
> #   "mu.min",
> #   "mu.max"
> # )
> # avgdf$summary<-F
> # avgdf<-rbind.fill(
> #   avgdf,
> #   tmpdf[,keepvars]
> # )
> 
> 
> #DIFFERENCES
> diffdf<-read.csv(
+   '01po_q_diffs.csv',
+   stringsAsFactors=F
+ )

> #DEPRECATED
> # tmpdf<-read.csv(
> #   'q_diffhat.csv',
> #   stringsAsFactors=F
> # )
> # tmpdf$question<-paste0(
> #   tmpdf$dimension,".sum"
> # )
> # #don't need all.sum
> # tmp<-tmpdf$question=="all.sum"
> # tmpdf<-tmpdf[!tmp,]
> # 
> # keepvars<-c(
> #   "question",
> #   "dimension",
> #   "difftype",
> #   "summary",
> #   "mu",
> #   "mu.min",
> #   "mu.max",
> #   "pval.class"
> # )
> # diffdf$summary<-F
> # diffdf$difftype<-"diff"
> # tmpdf$summary<-T
> # diffdf<-rbind.fill(
> #   diffdf,
> #   tmpdf[,keepvars]
> # )
> 
> #########################################################
> #########################################################
> 
> #plotting prelims
> require(ggplot2)

> require(ggthemes)

> require(extrafont)

> require(RColorBrewer)

> require(scales)
Loading required package: scales
 

Attaching package: ‘scales’

 
The following object is masked from ‘package:readr’:

    col_factor

 

> require(grid)
Loading required package: grid
 

> # #load fonts
> # loadfonts(quiet=T) #register w/ pdf
> # loadfonts(device = "win",quiet=T) #register w/ windows
> # #fonts()
> # #get ghostscript, for tex output
> # gsdir<-file.path(
> #   "c:",
> #   "Program Files",
> #   "gs"
> # )
> # gsdir_full<-file.path(
> #   gsdir,
> #   dir(gsdir),
> #   "bin",
> #   "gswin64c.exe"
> # )
> # Sys.setenv(
> #   R_GSCMD = gsdir_full
> # )
> # #initialize graphlist
> # gs.list<-list()
> 
> #########################################################
> #########################################################
> 
> #FIGURE 1 - AVERAGE RESPONSES
> 
> tmp<-avgdf$race%in%c(1,2)

> plotdf<-avgdf[tmp,]

> #fix question
> #order by dimension,
> #whether summary,
> #then by black mu
> #and make factor 
> plotdf$dimension<-factor(
+   plotdf$dimension,
+   levels=c("anxiety","punitive","mistrust"),
+   labels=c("Anxiety","Punitiveness","Mistrust")
+ )

> roworder<-order(
+   plotdf$question,
+   plotdf$race
+ )

> plotdf<-plotdf[roworder,]

> plotdf$blackmu<-tapply(
+   plotdf$mu,
+   plotdf$question,
+   function(x) rep(x[2],2)
+ ) %>% unlist

> neworder<-order(
+   plotdf$dimension,
+   #plotdf$summary,
+   plotdf$blackmu
+ )

> tmplevels<-plotdf$question[neworder] %>%
+   unique

> tmplabels<-sapply(tmplevels,function(x) {
+   getcode(x,"question","shortname",questionsdf)
+ })

> plotdf$question<-factor(
+   plotdf$question,
+   levels=tmplevels,
+   labels=tmplabels
+ )

> # #add face
> # tmpface<-rep("plain",length(tmplabels))
> # tmp<-!str_detect(tmplabels,"\\(")
> # tmpface[tmp]<-"bold"
> 
> # #get location of dividing lines
> # tmpdf<-unique(plotdf[neworder,c("question","dimension")])
> # diffs<-tmpdf$dimension %>%
> #   as.numeric %>% diff
> # hlines<-which(diffs==1) + 0.5
> 
> #order race
> plotdf$race<-factor(
+   plotdf$race,
+   levels=c(1,2),
+   labels=c("White","Black")
+ )

> tmpcolors<-c('red','blue')

> names(tmpcolors)<-levels(plotdf$race)

> #dodge positions
> dodge<-position_dodge(.6)

> g.tmp<- ggplot(
+   plotdf,
+   aes(
+     x=question,
+     y=mu,
+     ymin=mu.min,
+     ymax=mu.max,
+     color=race
+   )
+ ) +
+   geom_errorbar(
+     width=0,
+     position=dodge
+   ) + 
+   geom_point(
+     size=1,
+     position=dodge
+   ) +
+   geom_hline(
+     yintercept=50,
+     linetype='dashed',
+     color='grey'
+   ) +
+   scale_color_manual(
+     name="",
+     values=tmpcolors,
+     guide=guide_legend(
+       direction="horizontal"
+     )
+   ) +
+   xlab("") +
+   ylab("\n% Anxious, Mistrustful or Punitive") +
+   coord_flip() +  
+   facet_grid(
+     rows=vars(dimension),
+     scales='free_y',
+     space='free_y'
+   ) +
+   theme_bw(
+     base_size=14
+   ) +
+   theme(legend.position="top") +
+   theme(axis.text.y=element_text(size=8))

> setwd(outputdir)

> ggsave(
+   plot=g.tmp,
+   filename='fig_po_averages.png',
+   width=6,
+   height=9,
+   dpi=300
+ )

> ggsave(
+   plot=g.tmp,
+   filename='CleggFig1.pdf',
+   width=6,
+   height=9,
+   dpi=300
+ )

> #########################################################
> #########################################################
> 
> #FIG X - DIFFERENCES
> 
> # #aux plot will take difftype avg
> # tmp<-diffdf$difftype=="avg"
> # auxplotdf<-diffdf[tmp,]
> # tmp<-diffdf$difftype!="avg" 
> # plotdf<-diffdf[tmp,]
> plotdf<-diffdf

> #make this black-white rather than white-black
> plotdf$mu <- plotdf$mu * -1

> plotdf$mu.min <- plotdf$mu.min * -1

> plotdf$mu.max <- plotdf$mu.max * -1

> #fix question
> #order by dimension,
> #then mu
> #and make factor 
> plotdf$dimension<-factor(
+   plotdf$dimension,
+   levels=c("anxiety","punitive","mistrust"),
+   labels=c("Anxiety","Punitiveness","Mistrust")
+ )

> neworder<-order(
+   plotdf$dimension,
+   #plotdf$summary,
+   plotdf$mu
+ )

> tmplevels<-plotdf$question[neworder] %>%
+   unique

> tmplabels<-sapply(tmplevels,function(x) {
+   getcode(x,"question","shortname",questionsdf)
+ })

> plotdf$question<-factor(
+   plotdf$question,
+   levels=tmplevels,
+   labels=tmplabels
+ )

> # #factor in auxplot, too
> # auxplotdf$question<-factor(
> #   auxplotdf$question,
> #   levels=tmplevels,
> #   labels=tmplabels
> # )
> 
> # #add face
> # tmpface<-rep("plain",length(tmplabels))
> # tmp<-!str_detect(tmplabels,"\\(")
> # tmpface[tmp]<-"bold"
> 
> # #get location of dividing lines
> # tmpdf<-unique(plotdf[neworder,c("question","dimension")])
> # diffs<-tmpdf$dimension %>%
> #   as.numeric %>% diff
> # hlines<-which(diffs==1) + 0.5
> 
> #add pval info to shape of point
> plotdf$pval.shp<-NA

> plotdf$pval.shp[plotdf$pval.class=="at alpha=0.01"]<-1

> plotdf$pval.shp[plotdf$pval.class=="at alpha=0.05"]<-2

> plotdf$pval.shp[plotdf$pval.class=="at alpha=0.10"]<-3

> plotdf$pval.shp[plotdf$pval.class=="not sig"]<-4

> plotdf$pval.shp<-factor(
+   plotdf$pval.shp,
+   levels=c(1,2,3,4),
+   labels=c("at alpha=0.01","at alpha=0.05","at alpha=0.10","not sig")
+ )

> tmpshapes<-c(8,4,16,1)

> names(tmpshapes)<-levels(plotdf$pval.shp)

> shp.labels<-c(
+   bquote(alpha == 0.01),
+   bquote(alpha == 0.05),
+   bquote(alpha == 0.10)
+ )

> g.tmp<-ggplot() + 
+   geom_errorbar(
+     data=plotdf,
+     aes(
+       x=question,
+       ymin=mu.min,
+       ymax=mu.max
+     ),
+     width=0
+   ) + 
+   geom_point(
+     data=plotdf,
+     aes(
+       x=question,
+       y=mu,
+       shape=pval.shp
+     ),
+     size=1
+   ) +
+   geom_hline(
+     yintercept=0,
+     linetype='dashed',
+     color='grey'
+   ) +
+   # geom_vline(
+   #   xintercept=hlines[1],
+   #   linetype='dashed',
+   #   alpha=0.3
+   # ) + 
+   # geom_vline(
+   #   xintercept=hlines[2],
+   #   linetype='dashed',
+   #   alpha=0.3
+   # ) +
+   scale_shape_manual(
+     name="",
+     values=tmpshapes,
+     labels=shp.labels,
+     guide=guide_legend(
+       direction="horizontal"
+     )
+   ) +
+   xlab("") +
+   ylab("\nBlack-White Gap") +
+   coord_flip() +  
+   facet_grid(
+     rows=vars(dimension),
+     scales='free_y',
+     space='free_y'
+   ) +
+   theme_bw(
+     #base_family="CM Roman",
+     base_size=14
+   ) +
+   theme(legend.position="top") + 
+   theme(axis.text.y=element_text(size=8))

> setwd(outputdir)

> ggsave(
+   plot=g.tmp,
+   filename='fig_po_diffs.png',
+   width=6,
+   height=9
+ )

> #########################################################
> #########################################################
> 
> #ROOT CAUSE RESPONSES
> 
> #extras
> require(boot)

> #load data again
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> finaldf<-fread(
+   '01po_dataframe.csv'
+ )

> mean_se.boot<-function(x,w) {
+   
+   #df<-finaldf[question=='adeqprotect.time' & race==1]
+   #x<-df$aff
+   #w<-df$weights
+   dist<-boot(
+     data=x,
+     stat=function(x,d) {
+       mean(x[d])
+     },
+     R=1000,
+     weights=w
+   )
+   dist$t %>% as.vector
+   # if(raw==T) {
+   #   return(dist$t)
+   # } else {
+   #   returndf<-summarize.distribution2(
+   #     dist$t
+   #   )
+   #   returndf$N<-length(x)
+   #   returndf$pval<-
+   #     returndf$pval.class<-NA
+   #   return(returndf)
+   # }
+ }

> summarize.distribution3<-function(ests.distribution) {
+   #ests.distribution<-tmpdist
+   #get quantiles
+   quantiles<-quantile(
+     ests.distribution,
+     c(
+       0.01,
+       0.025,
+       0.05,
+       0.5,
+       0.95,
+       0.975,
+       0.99
+     )
+   )
+   #return mu, mu.min, mu.max
+   mu<-quantiles["50%"]
+   mu.min<-quantiles["2.5%"]
+   mu.max<-quantiles["97.5%"]
+   #and also a pval classification
+   if(mu>=0) {
+     if(quantiles["1%"]>0) {
+       pval.class<-'at alpha=0.01'
+     } else if(quantiles["2.5%"]>0) {
+       pval.class<-'at alpha=0.05'
+     } else if(quantiles["5%"]>0) {
+       pval.class<-'at alpha=0.10'
+     } else {
+       pval.class<-'not sig'
+     }
+   } else if(mu<0) {
+     if(quantiles["99%"]<0) {
+       pval.class<-'at alpha=0.01'
+     } else if(quantiles["97.5%"]<0) {
+       pval.class<-'at alpha=0.05'
+     } else if(quantiles["95%"]<0) {
+       pval.class<-'at alpha=0.10'
+     } else {
+       pval.class<-'not sig'
+     }
+   }
+   # #se
+   # #est of se explodes when lagdv coef is over 1
+   # #so need something that is robust to that scenario
+   # tmpboot<-boot(
+   #   ests.distribution,
+   #   f.sd,
+   #   R=500
+   # )
+   # se<-mean(tmpboot$t)
+   # se.q <- ( quantiles[3] - quantiles[1] ) / 4
+   #SE is less rather than more helpful
+   se<-NA 
+   #se.q<-NA
+   #get something like a two-sided pval test
+   #pval<-ecdf(ests.distribution)(0)
+   #pval<-ifelse(mu<0,(1-pval)*2,pval*2)
+   pval<-NA
+   #return me
+   list(
+     mu=mu,
+     mu.min=mu.min,
+     mu.max=mu.max,
+     se=se,
+     # #se.q=se.q,
+     pval=pval,
+     pval.class=pval.class
+   )
+ }

> #combine the two more prisons q's
> tmpdf <- finaldf

> tmpdf[question%in%c('moreprisons.gallup','moreprisons.lat'),question:='moreprisons']

> # #summarize rootcause questions
> # sumdf <- tmpdf[
> #   !is.na(race) & 
> #     race%in%c(1,2) &
> #     neut==0 & 
> #     question%in%c(
> #       'useforce.anes',
> #       'moreprisons',
> #       'moreimppunish.gallup'
> #     )
> #   ,
> #   .(
> #     index=1:1000,
> #     mu=mean_se.boot(
> #       100 * neg,
> #       weights
> #     )
> #   )
> #   ,
> #   by=c(
> #     'race',
> #     'question',
> #     'year'
> #   )
> # ][
> #   ,
> #   summarize.distribution3(mu)
> #   ,
> #   by=c(
> #     'race',
> #     'question',
> #     'year'
> #   )
> # ]
> # sumdf[order(race,mu)]
> 
> #summarize rootcause questions
> sumdf <- tmpdf[
+   !is.na(race) & 
+     race%in%c(1,2) &
+     neut==0 & 
+     question%in%c(
+       'useforce.anes',
+       'moreprisons',
+       'moreimppunish.gallup'
+     )
+   ,
+   .(
+     index=1:1000,
+     mu=mean_se.boot(
+       100 * neg,
+       weights
+     )
+   )
+   ,
+   by=c(
+     'race',
+     'question'
+   )
+ ][
+   ,
+   summarize.distribution3(mu)
+   ,
+   by=c(
+     'race',
+     'question'
+   )
+ ]

> sumdf[order(race,question,mu)]
    race             question       mu   mu.min   mu.max     se   pval    pval.class
   <int>               <char>    <num>    <num>    <num> <lgcl> <lgcl>        <char>
1:     1 moreimppunish.gallup 68.56002 66.84065 70.06448     NA     NA at alpha=0.01
2:     1          moreprisons 64.39878 63.16591 65.51027     NA     NA at alpha=0.01
3:     1        useforce.anes 63.34947 62.07410 64.73655     NA     NA at alpha=0.01
4:     2 moreimppunish.gallup 56.15616 50.45045 60.66817     NA     NA at alpha=0.01
5:     2          moreprisons 68.89251 65.14658 72.64251     NA     NA at alpha=0.01
6:     2        useforce.anes 87.93103 85.27851 90.18568     NA     NA at alpha=0.01

> finaldf[
+   question%in%c(
+     'useforce.anes',
+     'moreprisons.gallup',
+     'moreimppunish.gallup',
+     'moreprisons.lat'
+   ),
+   c('question','year')
+ ] %>% unique
               question  year
                 <char> <int>
1: moreimppunish.gallup  1955
2: moreimppunish.gallup  1989
3: moreimppunish.gallup  1982
4:        useforce.anes  1970
5:        useforce.anes  1992
6:        useforce.anes  1974
7:        useforce.anes  1976
8:        useforce.anes  1968
9:        useforce.anes  1972
[1] "######"
[1] "Running:"
[1] "05_regmods.R"

> #########################################################
> #########################################################
> 
> #clear workspace
> rm(list=ls())

> #load packages
> require(stringr)

> require(plyr)

> require(dplyr)

> require(zoo)

> require(data.table)

> require(tidyr)

> require(rprojroot)

> #extra
> require(data.table)

> require(lme4)
Loading required package: lme4
 
Loading required package: Matrix
 

Attaching package: ‘Matrix’

 
The following objects are masked from ‘package:tidyr’:

    expand, pack, unpack

 

> require(rstanarm)
Loading required package: rstanarm
 
Loading required package: Rcpp
 
This is rstanarm version 2.32.1
 
- See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
 
- Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
 
- For execution on a local, multicore CPU with excess RAM we recommend calling
 
  options(mc.cores = parallel::detectCores())
 

Attaching package: ‘rstanarm’

 
The following object is masked from ‘package:boot’:

    logit

 

> #set dirs
> rootdir<-find_root(
+   criterion=has_file('_rapol.Rproj')
+ )

> codedir<-file.path(rootdir,"code")

> setwd(codedir); dir()
[1] "01_publicopinion" "02_voting"        "03_dind"          "dirs.R"          
[5] "runeverything.R" 

> source('dirs.R')

> #helper functions
> setwd(pcodedir); dir()
 [1] "01_group.R"                "02_prep.R"                 "03_summarize.R"           
 [4] "04_summarize_output.R"     "05_regmods.R"              "06_regmods_output.R"      
 [7] "07_predict.R"              "08_predict_output.R"       "09_extra_output.R"        
[10] "10_RandR_summarize.R"      "11_RandR_model.R"          "12_RandR_modeloutput.R"   
[13] "functions.R"               "getcode.R"                 "getinfo2.R"               
[16] "readpoll_functions.R"      "XX_predict.R"              "XX_predict_output.R"      
[19] "XX_predict_outputTrends.R" "XX_predictscribbles.R"     "XX_regmods.R"             
[22] "XX_regmods_output.R"       "XX_runall.R"               "XX_scanmeta.R"            
[25] "XXdirs.R"                 

> source('readpoll_functions.R')

> source('getinfo2.R')

> source('functions.R')

> #set seed
> set.seed(23)

> #sample N
> setwd(filesdir)

> sample_df<-F

> # if(!"sample_N"%in%ls())
> #   sample_N<-as.numeric(readLines('sample_N.txt'))
> 
> #########################################################
> #########################################################
> 
> #load data
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> finaldf<-fread(
+   '01po_dataframe.csv'
+ )

> #########################################################
> #########################################################
> 
> #SETUP
> 
> felist<-list(
+   bvrt=c("RACE"),
+   full=c(
+     "RACE",
+     "gender",
+     "ed",
+     "age"
+   )
+ )

> relist<-list(
+   simple=c(
+     "year",
+     "GEO",
+     "question",
+     "RACEXyear"
+     #####
+   ),
+   pref=c(
+     "year",
+     "GEO",
+     "question",
+     #####
+     "RACEXed",
+     "RACEXgender",
+     "RACEXage",
+     "RACEXyear",
+     "RACEXGEO"#,
+     #"RACEXquestion"
+   )#,
+   #full=c(
+   #  "year",
+   #  "GEO",
+   #  "question",
+     #####
+     #"RACEXgender",
+     #"RACEXage",
+     #"RACEXquestion",
+     #"RACEXGEOXyear",
+     #"RACEXedXyear"
+   #)
+ )

> #########################################################
> #########################################################
> 
> #MODS
> modsdf<-expand.grid(
+   dv=c(
+     "aff"
+   ),
+   dimension=c(
+     "all",
+     "punitive",
+     "anxiety",
+     "mistrust"
+   ),
+   fes=c(
+     #"bvrt",
+     "full"
+   ),
+   res=c(
+     "simple",
+     "pref"#,
+     #"full"
+   ),
+   race=c(
+     "race",
+     "race2"
+   ),
+   sample=c(
+     "allpolls",
+     "statepolls"
+   ),
+   stringsAsFactors=F
+ )

> #modname
> modsdf$mname<-apply(
+   modsdf,1,paste0,collapse="."
+ )

> modsdf$sampname<-paste0(
+   modsdf$dimension,
+   ".",
+   modsdf$sample
+ )

> unique(modsdf$sampname)
[1] "all.allpolls"        "punitive.allpolls"   "anxiety.allpolls"    "mistrust.allpolls"  
[5] "all.statepolls"      "punitive.statepolls" "anxiety.statepolls"  "mistrust.statepolls"

> #trim
> tmp<-rep(T,nrow(modsdf))

> tmp<-tmp & modsdf$sample%in%c("allpolls")

> tmp<-tmp & modsdf$res%in%c("simple","pref","full")

> tmp<-tmp & modsdf$fes%in%c("full")

> tmp<-tmp & !modsdf$dimension%in%c("all")

> tmp<-tmp & modsdf$race=="race"

> modsdf<-modsdf[tmp,]

> modsdf$i<-1:nrow(modsdf)

> ####
> #not setup to run race, here..
> # tmp<-modsdf$race=="race"
> # if(sum(tmp)>0)
> #   stop('cant estimate race w/o making it a factor')
> 
> #########################################################
> #########################################################
> 
> #GET FORMS
> tmpseq.i<-1:nrow(modsdf)

> forms<-lapply(tmpseq.i,function(i) {
+   #i<-1  
+   #print(i)
+   thisrow<-modsdf[i,]
+   #get thisgeo
+   thisgeo<-ifelse(
+     thisrow$sample=="allpolls",
+     "division",
+     "state_alpha2"
+   )
+   thisrace<-ifelse(
+     thisrow$race=="race",
+     "race",
+     "race2"
+   )
+   #lhs
+   lhs<-thisrow$dv
+   
+   #rhs, fe
+   rhs.fe<-felist[[thisrow$fes]] %>% 
+     str_replace("GEO",thisgeo) %>%
+     str_replace("RACE",paste0("factor(",thisrace,")")) %>%
+     paste(collapse=" + ")
+   
+   #rhs,re
+   re.raw<-relist[[thisrow$res]] %>% 
+     str_replace("GEO",thisgeo) %>%
+     str_replace("RACE",thisrace)
+   rhs.re<-paste0(
+     "(1 | ",re.raw,")"
+   ) %>% 
+     paste(collapse=" + ")
+   rhs<-paste0(
+     rhs.fe," + ",rhs.re
+   )
+   #put together
+   thisform<-paste0(
+     lhs,
+     " ~ ",
+     rhs
+   )
+   as.formula(thisform)
+ })

> names(forms)<-modsdf$mname

> #make sure all vars are prez
> allvars<-lapply(forms,all.vars) %>%
+   unlist %>% unique

> tmp<-allvars%in%names(finaldf)

> if(sum(!tmp)>0) {
+   print(allvars[!tmp])
+   stop()
+ }

> #########################################################
> #########################################################
> 
> #SAMPS
> #each sampname in modsdf
> sampnames<-unique(modsdf$sampname)

> tmpseq.j<-seq_along(sampnames)

> sampspecs<-lapply(tmpseq.j,function(j) {
+   #j<-1
+   print(j)
+   #get all forms w/ this sampname
+   this.sampname<-sampnames[j]
+   tmprows<-modsdf$sampname==this.sampname
+   tmpmods<-modsdf$mname[tmprows]
+   tmpdim<-unique(modsdf$dimension[tmprows])
+   if(length(tmpdim)>1) stop()
+   #these are the vars
+   mycols<-lapply(forms[tmpmods],all.vars) %>% 
+     unlist %>% unique
+   mycols<-c(
+     mycols,
+     "respid",
+     "pollid"
+   ) %>% unique
+   #these are the rows; all vars present and dimension
+   myrows<-complete.cases(finaldf[,mycols,with=F])
+   finaldf$race%in%c(1,2) #only blacks/whites
+   if(tmpdim!="all") {
+     myrows<-myrows & 
+       finaldf$dimension==tmpdim
+   }  
+   #return rows/cols
+   list(
+     rows=myrows,
+     cols=mycols
+   )
+ })
[1] 1
[1] 2
[1] 3

> names(sampspecs)<-sampnames

> #get info of each samp
> sampinfodf<-lapply(tmpseq.j,function(j) {
+   listbit<-sampspecs[[j]]
+   thisdf<-finaldf[listbit$rows,listbit$cols,with=F]
+   data.frame(
+     sampname=names(sampspecs)[j],
+     N=nrow(thisdf),
+     N.resp=length(unique(thisdf$respid)),
+     N.questions=length(unique(thisdf$question)),
+     N.polls=length(unique(thisdf$pollid)),
+     stringsAsFactors=F
+   )
+ }) %>% rbind.fill

> #########################################################
> #########################################################
> 
> #ESTIMATE 
> 
> #DEPRECATED
> # tmpseq.i<-1:nrow(modsdf)
> # st_time<-proc.time()
> # modslist<-lapply(tmpseq.i,function(i) {
> #   #i<-1
> #   print(
> #     paste(
> #       "Estimating",i,"of",length(tmpseq.i)
> #     )
> #   )
> #   thisrow<-modsdf[i,]
> #   thisform<-forms[[i]]
> #   ss<-sampspecs[[thisrow$sampname]]
> #   thisdf<-finaldf[ss$rows,ss$cols,with=F]
> #   #####
> #   #sample to save time?
> #   if(sample_df)
> #     thisdf<-dplyr::sample_n(thisdf,sample_N)
> #   #####
> #   #fit mod/getmodtime
> #   modtime<-system.time(
> #     m<-glmer(
> #       data=thisdf,
> #       formula=thisform,
> #       family=binomial(link="logit")
> #     )
> #   )
> #   list(
> #     m=m,
> #     modtime=modtime
> #   )
> # })
> # names(modslist)<-modsdf$mname
> # duration<-proc.time() - st_time
> 
> # this is time-intensive, so
> # preferable to attempt to run this in parallel
> require(doParallel)
Loading required package: doParallel
 
Loading required package: foreach
 
Loading required package: iterators
 
Loading required package: parallel
 

> require(foreach)

> numCores<-getDoParWorkers()

> registerDoParallel(cores=numCores)

> modslist <- foreach(i=tmpseq.i,.packages='data.table') %dopar% {
+   #i<-1
+   print(
+     paste(
+       "Estimating",i,"of",length(tmpseq.i)
+     )
+   )
+   thisrow<-modsdf[i,]
+   thisform<-forms[[i]]
+   ss<-sampspecs[[thisrow$sampname]]
+   thisdf<-finaldf[ss$rows,ss$cols,with=F]
+   #####
+   #sample to save time?
+   if(sample_df)
+     thisdf<-dplyr::sample_n(thisdf,sample_N)
+   #####
+   #fit mod/getmodtime
+   modtime<-system.time(
+     m<-lme4::glmer(
+       data=thisdf,
+       formula=thisform,
+       family=binomial(link="logit")
+     )
+   )
+   list(
+     m=m,
+     modtime=modtime
+   )
+ }

> names(modslist)<-modsdf$mname

> #########################################################
> #########################################################
> 
> #IDENTIFY PREFMODS BASED ON FIT
> 
> #merge sample info
> finaldf<-merge(
+   modsdf,
+   sampinfodf,
+   by="sampname"
+ )

> finaldf<-finaldf[order(finaldf$i),]

> #pick pref model
> mods<-lapply(modslist,function(x) x$m)

> tmpseq.i<-seq_along(mods)

> fitdf<-lapply(tmpseq.i,function(i) {
+   #i<-4
+   m<-mods[[i]]
+   m.sum<-summary(m)
+   returndf<-data.frame(
+     t(m.sum$AICtab)
+   )
+   returndf$warnings<-paste0(
+     m.sum$optinfo$conv$lme4$messages,
+     collapse="/"
+   )
+   returndf
+ }) %>% rbind.fill

> names(fitdf)<-tolower(names(fitdf)) 

> fitdf$mname<-names(mods)

> #put it together
> finaldf<-merge(
+   finaldf,
+   fitdf,
+   by="mname"
+ )

> #########################################################
> #########################################################
> 
> #obtain prefmods
> 
> #rank models within sample
> finaldf<-by(finaldf,finaldf$sampname,function(df) {
+   #df<-finaldf[finaldf$sampname=="all.allpolls",]
+   df$fes<-factor(
+     df$fes,
+     levels=c("pref","full","bvrt")
+   )
+   df$res<-factor(
+     df$res,
+     levels=c("pref","full")
+   )
+   #by aic
+   df$aic.rank<-order(
+     df$aic,
+     df$fes,
+     df$res
+   )
+   #by bic
+   df$bic.rank<-order(
+     df$bic,
+     df$fes,
+     df$res
+   )
+   #get pref
+   df$pref<-F
+   df$pref[df$aic.rank==1]<-T
+   #return
+   df
+ }) %>% rbind.fill

> #retain original order
> finaldf<-finaldf[order(finaldf$i),]

> #########################################################
> #########################################################
> 
> #save mod and sampinfo
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> write.csv(
+   finaldf,
+   "01po_regmods_info.csv",
+   row.names=F
+ )

> #save mods/modtimes
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> saveRDS(
+   modslist,
+   "01po_modslist.RDS"
+ )

> #########################################################
> #########################################################
[1] "######"
[1] "Running:"
[1] "06_regmods_output.R"

> #########################################################
> #########################################################
> 
> #clear workspace
> rm(list=ls())

> #load packages
> require(stringr)

> require(plyr)

> require(dplyr)

> require(zoo)

> require(data.table)

> require(tidyr)

> require(rprojroot)

> #extra
> require(data.table)

> require(lme4)

> require(rstanarm)

> #set dirs
> rootdir<-find_root(
+   criterion=has_file('_rapol.Rproj')
+ )

> codedir<-file.path(rootdir,"code")

> setwd(codedir); dir()
[1] "01_publicopinion" "02_voting"        "03_dind"          "dirs.R"          
[5] "runeverything.R" 

> source('dirs.R')

> #helper functions
> setwd(pcodedir); dir()
 [1] "01_group.R"                "02_prep.R"                 "03_summarize.R"           
 [4] "04_summarize_output.R"     "05_regmods.R"              "06_regmods_output.R"      
 [7] "07_predict.R"              "08_predict_output.R"       "09_extra_output.R"        
[10] "10_RandR_summarize.R"      "11_RandR_model.R"          "12_RandR_modeloutput.R"   
[13] "functions.R"               "getcode.R"                 "getinfo2.R"               
[16] "readpoll_functions.R"      "XX_predict.R"              "XX_predict_output.R"      
[19] "XX_predict_outputTrends.R" "XX_predictscribbles.R"     "XX_regmods.R"             
[22] "XX_regmods_output.R"       "XX_runall.R"               "XX_scanmeta.R"            
[25] "XXdirs.R"                 

> source('readpoll_functions.R')

> source('getinfo2.R')

> source('functions.R')

> #load data and mods
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> finaldf<-fread('01po_dataframe.csv')

> mods<-readRDS('01po_modslist.RDS')

> #modnames<-names(modslist)
> mods<-lapply(mods,function(x) x$m)

> #set seed
> set.seed(23)

> #########################################################
> #########################################################
> 
> #plotting prelims
> require(ggplot2)

> require(ggthemes)

> require(extrafont)

> require(RColorBrewer)

> require(scales)

> # #load fonts
> # loadfonts(quiet=T) #register w/ pdf
> # loadfonts(device = "win",quiet=T) #register w/ windows
> # #fonts()
> # #get ghostscript, for tex output
> # gsdir<-file.path(
> #   "c:",
> #   "Program Files",
> #   "gs"
> # )
> # gsdir_full<-file.path(
> #   gsdir,
> #   dir(gsdir),
> #   "bin",
> #   "gswin64c.exe"
> # )
> # Sys.setenv(
> #   R_GSCMD = gsdir_full
> # )
> # #initialize graphlist
> # gs.list<-list()
> 
> #quick function to outputdfs
> output <- function(df,tmpname) {
+   setwd(outputdir)
+   if( str_detect(tmpname,"\\.pdf$|\\.png$") ) 
+     tmpname<-str_replace(tmpname,"\\.pdf$|\\.png$",".csv")
+   write.csv(
+     df,
+     tmpname,
+     row.names=F
+   )
+ }

> #########################################################
> #########################################################
> 
> #identify prefmods
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> regmodsdf<-read.csv(
+   '01po_regmods_info.csv',
+   stringsAsFactors=F
+ )

> #choose the pref models
> prefmodsdf<-by(regmodsdf,regmodsdf$dimension,function(df) {
+   #df<-regmodsdf[regmodsdf$dimension=="mistrust",]
+   tmp<-df$warnings==""
+   if(sum(tmp)>0) {
+     tmpdf<-df[tmp,]
+     tmpdf[order(tmpdf$bic.rank),][1,]
+   } else {
+     df[df$pref,][1,]
+   }
+ }) %>% rbind.fill

> prefmod_names<-prefmodsdf$mname

> prefmods<-mods[prefmod_names]

> #there should be 3 of these
> if(length(prefmods)!=3)
+  stop('problem')

> #########################################################
> #########################################################
> 
> #DEPRECATED
> 
> # #get coefs from prefmods
> # tmpseq.i<-seq_along(prefmods)
> # plotdf<-lapply(tmpseq.i,function(i) {
> #   #i<-1
> #   print(i)
> #   thismod<-prefmods[[i]]
> #   thismodname<-names(prefmods)[i]
> #   tmpsum<-summary(thismod)
> #   tmpcoefs<-tmpsum$coefficients
> #   returndf<-data.frame(
> #     mname=thismodname,
> #     var=row.names(tmpcoefs),
> #     mu=tmpcoefs[,1],
> #     mu.min=tmpcoefs[,1] - 1.96 * tmpcoefs[,2],
> #     mu.max=tmpcoefs[,1] + 1.96 * tmpcoefs[,2],
> #     pval=tmpcoefs[,4],
> #     stringsAsFactors=F
> #   )
> #   row.names(returndf)<-NULL
> #   returndf
> # }) %>% rbind.fill
> # 
> # #get dimension
> # plotdf$dimension<-str_extract(
> #   plotdf$mname,
> #   "all|anxiety|mistrust|punitive"
> # )
> # 
> # #get rid of intercept
> # tmp<-plotdf$var=="(Intercept)" |
> #   plotdf$dimension=="all"
> # plotdf<-plotdf[!tmp,]
> # 
> # #pvals
> # plotdf$pval.class<-get.pvals.class(plotdf$pval)
> # #add pval info to shape of point
> # plotdf$pval.shp<-NA
> # plotdf$pval.shp[plotdf$pval.class=="at alpha=0.01"]<-1
> # plotdf$pval.shp[plotdf$pval.class=="at alpha=0.05"]<-2
> # plotdf$pval.shp[plotdf$pval.class=="at alpha=0.10"]<-3
> # plotdf$pval.shp[plotdf$pval.class=="not sig"]<-4
> # plotdf$pval.shp<-factor(
> #   plotdf$pval.shp,
> #   levels=c(1,2,3,4),
> #   labels=c(
> #     "at alpha=0.01",
> #     "at alpha=0.05",
> #     "at alpha=0.10",
> #     "not sig"
> #   )
> # )
> # #tmpshapes
> # tmpshapes<-c(8,4,16,1)
> # names(tmpshapes)<-levels(plotdf$pval.shp)
> # shp.labels<-c(
> #   bquote(alpha == 0.01),
> #   bquote(alpha == 0.05),
> #   bquote(alpha == 0.10)
> # )
> # 
> # #divide by 4 rule
> # #(means, roughly shows marginal effect in prob)
> # plotdf$mu<-plotdf$mu/4
> # plotdf$mu.min<-plotdf$mu.min/4
> # plotdf$mu.max<-plotdf$mu.max/4
> # 
> # #factors
> # plotdf$var %>% unique
> # tmplevels<-c(
> #   "gender",
> #   "ed",
> #   "age",
> #   "factor(race)2",
> #   "factor(race)3"
> # ) %>% rev
> # tmplabels<-c(
> #   "Female",
> #   "Education",
> #   "Age",
> #   "Black",
> #   "Other"
> # ) %>% rev
> # plotdf$varname<-factor(
> #   plotdf$var,
> #   tmplevels,
> #   tmplabels
> # )
> # 
> # tmplevels<-c(
> #   "anxiety",
> #   "mistrust",
> #   "punitive"
> # ) %>% rev
> # tmplabels<-c(
> #   "Anxiety",
> #   "Mistrust",
> #   "Punitiveness"
> # ) %>% rev
> # plotdf$dimension<-factor(
> #   plotdf$dimension,
> #   tmplevels,
> #   tmplabels
> # )
> # 
> # g.tmp<-ggplot(
> #   plotdf,
> #   aes(
> #     x=varname,
> #     y=mu,
> #     ymin=mu.min,
> #     ymax=mu.max,
> #     shape=pval.shp
> #   )
> # ) + 
> #   geom_point(
> #     size=2
> #   ) +
> #   geom_errorbar(
> #     size=0.4,
> #     width=0.2
> #   ) +
> #   geom_hline(
> #     yintercept=0,
> #     linetype='dashed',
> #     color='black'
> #   ) +
> #   scale_shape_manual(
> #     name="",
> #     values=tmpshapes,
> #     labels=shp.labels,
> #     drop=F
> #   ) + 
> #   scale_color_discrete(
> #     name=""
> #   ) +
> #   ylab("\nEffect on Punitiveness, Anxiety, Mistrust ") +
> #   xlab("") +
> #   coord_flip() +
> #   facet_wrap(
> #     ~ dimension,
> #     ncol=1
> #   ) +
> #   theme_bw()
> # 
> # setwd(outputdir)
> # ggsave(
> #   plot=g.tmp,
> #   filename='fig_publicopinion_coefplots.png',
> #   width=6,
> #   height=6
> # )
> 
> #########################################################
> #########################################################
> 
> #FIG X - RACE EFFECT
> #the main thing we are interested in showing
> #is the effect of race in each of these three dimensions
> 
> predictdf<-expand.grid(
+   race2=c(1,2),
+   gender=c(1,2),
+   ed=c(1,2,3,4),
+   age=c(1,2,3,4)
+ )

> #ensure it works whether race or race2
> predictdf$race<-predictdf$race2

> #loop through and predictdf
> tmpseq.i<-seq_along(prefmods)

> plotdf<-lapply(tmpseq.i,function(i) {
+   #i<-1
+   print(i)
+   
+   #get params
+   thismod<-prefmods[[i]]
+   thismodname<-names(prefmods)[i]
+   thisdimension<-str_extract(
+     thismodname,
+     "all|anxiety|punitive|mistrust"
+   )
+   
+   #override w/ full model
+   tmp <- str_detect(
+     names(mods),
+     'pref'
+   ) &
+     str_detect(
+       names(mods),
+       thisdimension
+     )
+   thismod<-mods[tmp][[1]]
+   thismodname<-names(thismod)
+ 
+   #############
+   
+   #set up predictdf
+   
+   #get mrefs
+   mrefs<-get.medianrefs(thismod)
+   predictdf$year<-mrefs$year
+   predictdf$question<-mrefs$question
+   predictdf$division<-mrefs$division
+   predictdf$row<-1:nrow(predictdf)
+   
+   #now, add all interactions
+   loopdf<-expand.grid(
+     v1=c(
+       "race",
+       "race2"
+     ),
+     v2=c(
+       "question",
+       "age",
+       "ed",
+       "gender",
+       "year",
+       "region",
+       "division",
+       "state_alpha2"
+     ),
+     stringsAsFactors=F
+   )
+   tmpseq.j<-1:nrow(loopdf)
+   for(j in tmpseq.j) {
+     #print(i)
+     #j<-1
+     thisrow<-loopdf[j,]
+     newname<-paste0(
+       thisrow$v1,
+       "X",
+       thisrow$v2
+     )
+     #make these all distinct categories
+     predictdf[[newname]]<-paste0(
+       predictdf[[thisrow$v1]],
+       "_",
+       predictdf[[thisrow$v2]]
+     )
+   }
+   #add others
+   predictdf$raceXdivisionXyear<-paste0(
+     predictdf$race,"_",
+     predictdf$division,"_",
+     predictdf$year
+   )
+   predictdf$raceXedXyear<-paste0(
+     predictdf$race,"_",
+     predictdf$ed,"_",
+     predictdf$year
+   )
+   
+   #get predictions w/ merTools
+   tmpoutput<-merTools::predictInterval(
+     thismod,
+     newdata=predictdf,
+     which="full",
+     level=0.95,
+     type='probability',
+     include.resid.var=F,
+     returnSims=T
+   )
+   simMat<-attr(tmpoutput,"sim.results")
+   simMat<-apply(simMat,1,thismod@resp$family$linkinv)
+   simMat<-t(simMat) #* 100
+   
+   ###SUM BLACK-WHITE DIFFERENCE
+ 
+   tmpdist<-simMat[predictdf$race==2,] - 
+     simMat[predictdf$race==1,]
+   returndf1<-summarize.distribution2(tmpdist)
+   returndf1$dimension<-thisdimension
+   row.names(returndf1)<-NULL
+   returndf1$gap <- 'blackwhite'
+   
+   ###SUM BLACK ELITE - BLACK NONELITE DIFFERENCE
+   tmpdist<-simMat[predictdf$race==2 & predictdf$ed==4,] - 
+     simMat[predictdf$race==2 & predictdf$ed==1,]
+   returndf2<-summarize.distribution2(tmpdist)
+   returndf2$dimension<-thisdimension
+   row.names(returndf2)<-NULL
+   returndf2$gap <- 'elitenonelite_black'
+   
+   returndf<-rbind(
+     returndf1,
+     returndf2
+   )
+   
+ }) %>% rbind.fill
[1] 1
[1] 2
[1] 3

> plotdf$gap<-factor(
+   plotdf$gap,
+   c('blackwhite','elitenonelite_black'),
+   c('Black-White Gap','Black Elite Gap')
+ )

> tmplevels<-c(
+   "anxiety",
+   "punitive",
+   "mistrust"
+ ) %>% rev

> tmplabels<-c(
+   "Anxiety",
+   "Punitiveness",
+   "Mistrust"
+ ) %>% rev

> plotdf$dimension<-factor(
+   plotdf$dimension,
+   tmplevels,
+   tmplabels
+ )

> #add pval info to shape of point
> plotdf$pval.shp<-NA

> plotdf$pval.shp[plotdf$pval.class=="at alpha=0.01"]<-1

> plotdf$pval.shp[plotdf$pval.class=="at alpha=0.05"]<-2

> plotdf$pval.shp[plotdf$pval.class=="at alpha=0.10"]<-3

> plotdf$pval.shp[plotdf$pval.class=="not sig"]<-4

> plotdf$pval.shp<-factor(
+   plotdf$pval.shp,
+   levels=c(1,2,3,4),
+   labels=c(
+     "at alpha=0.01",
+     "at alpha=0.05",
+     "at alpha=0.10",
+     "not sig"
+   )
+ )

> #tmpshapes
> tmpshapes<-c(8,4,16,1)

> names(tmpshapes)<-levels(plotdf$pval.shp)

> shp.labels<-c(
+   bquote(alpha == 0.01),
+   bquote(alpha == 0.05),
+   bquote(alpha == 0.10)
+ )

> g.tmp <- ggplot(
+   plotdf,
+   aes(
+     x=dimension,
+     y=mu,
+     ymin=mu.min,
+     ymax=mu.max,
+     shape=pval.shp
+   )
+ ) + 
+   geom_point(
+     size=2
+   ) +
+   geom_errorbar(
+     size=0.4,
+     width=0
+   ) +
+   geom_hline(
+     yintercept=0,
+     linetype='dashed',
+     color='black'
+   ) +
+   scale_shape_manual(
+     name="",
+     values=tmpshapes,
+     labels=shp.labels,
+     drop=F
+   ) + 
+   scale_color_discrete(
+     name=""
+   ) +
+   ylab("\nEstimated Gap") +
+   xlab("") +
+   coord_flip() +
+   facet_grid(
+      ~ gap
+   ) +
+   theme_bw()

> setwd(outputdir)

> ggsave(
+   plot=g.tmp,
+   filename='fig_po_effects.png',
+   width=8,
+   height=4
+ )

> output(plotdf,'fig_po_effects.png')
[1] "######"
[1] "Running:"
[1] "07_predict.R"

> #########################################################
> #########################################################
> 
> #clear workspace
> rm(list=ls())

> #load packages
> require(stringr)

> require(plyr)

> require(dplyr)

> require(zoo)

> require(data.table)

> require(tidyr)

> require(rprojroot)

> #extra
> require(lme4)

> require(merTools)
Loading required package: merTools
 
Loading required package: arm
 

arm (Version 1.14-4, built: 2024-4-1)

 
Working directory is C:/Users/adane/Dropbox/data/_rapol/code/01_publicopinion

 

Attaching package: ‘arm’

 
The following objects are masked from ‘package:rstanarm’:

    invlogit, logit

 
The following object is masked from ‘package:scales’:

    rescale

 
The following object is masked from ‘package:xtable’:

    display

 
The following object is masked from ‘package:boot’:

    logit

 

> require(doParallel)

> #set dirs
> rootdir<-find_root(
+   criterion=has_file('_rapol.Rproj')
+ )

> codedir<-file.path(rootdir,"code")

> setwd(codedir); dir()
[1] "01_publicopinion" "02_voting"        "03_dind"          "dirs.R"          
[5] "runeverything.R" 

> source('dirs.R')

> #helper functions
> setwd(pcodedir); dir()
 [1] "01_group.R"                "02_prep.R"                 "03_summarize.R"           
 [4] "04_summarize_output.R"     "05_regmods.R"              "06_regmods_output.R"      
 [7] "07_predict.R"              "08_predict_output.R"       "09_extra_output.R"        
[10] "10_RandR_summarize.R"      "11_RandR_model.R"          "12_RandR_modeloutput.R"   
[13] "functions.R"               "getcode.R"                 "getinfo2.R"               
[16] "readpoll_functions.R"      "XX_predict.R"              "XX_predict_output.R"      
[19] "XX_predict_outputTrends.R" "XX_predictscribbles.R"     "XX_regmods.R"             
[22] "XX_regmods_output.R"       "XX_runall.R"               "XX_scanmeta.R"            
[25] "XXdirs.R"                 

> source('readpoll_functions.R')

> source('getinfo2.R')

> source('functions.R')

> #load data and mods
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> #finaldf<-fread('finaldf_po.csv')
> mods<-readRDS('01po_modslist.RDS')

> modtimes<-lapply(mods,function(x) x$modtime['elapsed']/60)

> mods<-lapply(mods,function(x) x$m)

> lapply(mods,summary)
$aff.punitive.full.simple.race.allpolls
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: binomial  ( logit )
Formula: aff ~ factor(race) + gender + ed + age + (1 | year) + (1 | division) +  
    (1 | question) + (1 | raceXyear)
   Data: thisdf

      AIC       BIC    logLik  deviance  df.resid 
 287482.8  287586.6 -143731.4  287462.8    236399 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.9480 -0.9804  0.5210  0.7277  3.6403 

Random effects:
 Groups    Name        Variance Std.Dev.
 raceXyear (Intercept) 0.02576  0.16050 
 year      (Intercept) 0.15357  0.39187 
 question  (Intercept) 0.55536  0.74522 
 division  (Intercept) 0.00723  0.08503 
Number of obs: 236409, groups:  raceXyear, 147; year, 50; question, 16; division, 9

Fixed effects:
               Estimate Std. Error z value Pr(>|z|)    
(Intercept)    0.723448   0.200084   3.616    3e-04 ***
factor(race)2 -0.848663   0.036400 -23.315   <2e-16 ***
factor(race)3 -0.409174   0.044259  -9.245   <2e-16 ***
gender        -0.311396   0.009082 -34.287   <2e-16 ***
ed            -0.082672   0.004556 -18.145   <2e-16 ***
age            0.040435   0.004589   8.810   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) fct()2 fct()3 gender ed    
factor(rc)2 -0.081                            
factor(rc)3 -0.067  0.358                     
gender      -0.071 -0.004  0.009              
ed          -0.070  0.045  0.022  0.045       
age         -0.066  0.029  0.045 -0.017  0.173

$aff.anxiety.full.simple.race.allpolls
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: binomial  ( logit )
Formula: aff ~ factor(race) + gender + ed + age + (1 | year) + (1 | division) +  
    (1 | question) + (1 | raceXyear)
   Data: thisdf

     AIC      BIC   logLik deviance df.resid 
152175.2 152271.8 -76077.6 152155.2   116461 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.3277 -1.1235  0.6555  0.7957  1.7443 

Random effects:
 Groups    Name        Variance Std.Dev.
 raceXyear (Intercept) 0.016005 0.12651 
 year      (Intercept) 0.034436 0.18557 
 question  (Intercept) 0.140730 0.37514 
 division  (Intercept) 0.009243 0.09614 
Number of obs: 116471, groups:  raceXyear, 105; year, 35; question, 9; division, 9

Fixed effects:
               Estimate Std. Error z value Pr(>|z|)    
(Intercept)    0.200292   0.138812   1.443 0.149047    
factor(race)2  0.356871   0.037075   9.626  < 2e-16 ***
factor(race)3  0.059268   0.045230   1.310 0.190077    
gender         0.198791   0.012285  16.181  < 2e-16 ***
ed            -0.082159   0.006212 -13.227  < 2e-16 ***
age           -0.022674   0.006129  -3.699 0.000216 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) fct()2 fct()3 gender ed    
factor(rc)2 -0.107                            
factor(rc)3 -0.093  0.317                     
gender      -0.138 -0.013  0.006              
ed          -0.137  0.055  0.033  0.041       
age         -0.123  0.038  0.055 -0.025  0.162

$aff.mistrust.full.simple.race.allpolls
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: binomial  ( logit )
Formula: aff ~ factor(race) + gender + ed + age + (1 | year) + (1 | division) +  
    (1 | question) + (1 | raceXyear)
   Data: thisdf

     AIC      BIC   logLik deviance df.resid 
105015.0 105110.7 -52497.5 104995.0   106047 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.4885 -0.6832 -0.2644  0.6167  7.8654 

Random effects:
 Groups    Name        Variance Std.Dev.
 raceXyear (Intercept) 0.062177 0.24935 
 year      (Intercept) 0.178591 0.42260 
 question  (Intercept) 1.202744 1.09670 
 division  (Intercept) 0.004767 0.06904 
Number of obs: 106057, groups:  raceXyear, 115; year, 39; question, 11; division, 9

Fixed effects:
               Estimate Std. Error z value Pr(>|z|)    
(Intercept)   -0.734936   0.334684  -2.196  0.02810 *  
factor(race)2  0.608239   0.064150   9.481  < 2e-16 ***
factor(race)3  0.208841   0.072790   2.869  0.00412 ** 
gender        -0.004316   0.015223  -0.283  0.77680    
ed            -0.061371   0.007935  -7.735 1.04e-14 ***
age           -0.098865   0.007777 -12.712  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) fct()2 fct()3 gender ed    
factor(rc)2 -0.081                            
factor(rc)3 -0.070  0.378                     
gender      -0.064 -0.012  0.007              
ed          -0.067  0.037  0.019  0.041       
age         -0.060  0.034  0.049 -0.053  0.117

$aff.punitive.full.pref.race.allpolls
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: binomial  ( logit )
Formula: aff ~ factor(race) + gender + ed + age + (1 | year) + (1 | division) +  
    (1 | question) + (1 | raceXed) + (1 | raceXgender) + (1 |  
    raceXage) + (1 | raceXyear) + (1 | raceXdivision)
   Data: thisdf

      AIC       BIC    logLik  deviance  df.resid 
 285942.9  286088.1 -142957.5  285914.9    236395 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.2598 -0.9644  0.5112  0.7261  3.4841 

Random effects:
 Groups        Name        Variance  Std.Dev.
 raceXyear     (Intercept) 0.0280198 0.16739 
 year          (Intercept) 0.1391736 0.37306 
 raceXdivision (Intercept) 0.0106649 0.10327 
 question      (Intercept) 0.5656735 0.75211 
 raceXage      (Intercept) 0.0042802 0.06542 
 raceXed       (Intercept) 0.0283023 0.16823 
 division      (Intercept) 0.0001557 0.01248 
 raceXgender   (Intercept) 0.0057053 0.07553 
Number of obs: 236409, groups:  
raceXyear, 147; year, 50; raceXdivision, 27; question, 16; raceXage, 12; raceXed, 12; division, 9; raceXgender, 6

Fixed effects:
              Estimate Std. Error z value Pr(>|z|)    
(Intercept)    0.36791    0.26679   1.379 0.167884    
factor(race)2 -0.79389    0.15616  -5.084  3.7e-07 ***
factor(race)3 -0.34899    0.15905  -2.194 0.028222 *  
gender        -0.22107    0.06395  -3.457 0.000546 ***
ed            -0.03071    0.04393  -0.699 0.484577    
age            0.06440    0.01915   3.363 0.000772 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) fct()2 fct()3 gender ed    
factor(rc)2 -0.280                            
factor(rc)3 -0.279  0.468                     
gender      -0.348 -0.006 -0.004              
ed          -0.399  0.002  0.001 -0.005       
age         -0.175  0.005  0.026 -0.004  0.005

$aff.anxiety.full.pref.race.allpolls
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: binomial  ( logit )
Formula: aff ~ factor(race) + gender + ed + age + (1 | year) + (1 | division) +  
    (1 | question) + (1 | raceXed) + (1 | raceXgender) + (1 |  
    raceXage) + (1 | raceXyear) + (1 | raceXdivision)
   Data: thisdf

     AIC      BIC   logLik deviance df.resid 
151678.3 151813.6 -75825.1 151650.3   116457 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.4179 -1.1156  0.6442  0.7935  1.9308 

Random effects:
 Groups        Name        Variance Std.Dev.
 raceXyear     (Intercept) 0.014662 0.12109 
 year          (Intercept) 0.037424 0.19345 
 raceXdivision (Intercept) 0.007446 0.08629 
 raceXage      (Intercept) 0.004061 0.06373 
 raceXed       (Intercept) 0.021744 0.14746 
 question      (Intercept) 0.142464 0.37744 
 division      (Intercept) 0.003323 0.05765 
 raceXgender   (Intercept) 0.001485 0.03853 
Number of obs: 116471, groups:  
raceXyear, 105; year, 35; raceXdivision, 27; raceXage, 12; raceXed, 12; question, 9; division, 9; raceXgender, 6

Fixed effects:
               Estimate Std. Error z value Pr(>|z|)    
(Intercept)    0.003502   0.202642   0.017 0.986214    
factor(race)2  0.441761   0.132624   3.331 0.000866 ***
factor(race)3  0.080132   0.135412   0.592 0.554007    
gender         0.172152   0.036854   4.671    3e-06 ***
ed            -0.027186   0.039360  -0.691 0.489759    
age            0.007015   0.019472   0.360 0.718658    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) fct()2 fct()3 gender ed    
factor(rc)2 -0.321                            
factor(rc)3 -0.316  0.473                     
gender      -0.273 -0.006 -0.001              
ed          -0.488  0.009  0.002  0.002       
age         -0.245  0.013  0.031 -0.002  0.014

$aff.mistrust.full.pref.race.allpolls
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: binomial  ( logit )
Formula: aff ~ factor(race) + gender + ed + age + (1 | year) + (1 | division) +  
    (1 | question) + (1 | raceXed) + (1 | raceXgender) + (1 |  
    raceXage) + (1 | raceXyear) + (1 | raceXdivision)
   Data: thisdf

     AIC      BIC   logLik deviance df.resid 
104807.7 104941.7 -52389.8 104779.7   106043 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-4.0200 -0.6728 -0.2614  0.6102  8.4690 

Random effects:
 Groups        Name        Variance  Std.Dev. 
 raceXyear     (Intercept) 6.455e-02 2.541e-01
 year          (Intercept) 1.556e-01 3.945e-01
 raceXdivision (Intercept) 1.550e-02 1.245e-01
 raceXage      (Intercept) 4.725e-03 6.874e-02
 raceXed       (Intercept) 1.458e-02 1.207e-01
 question      (Intercept) 1.209e+00 1.100e+00
 division      (Intercept) 4.091e-03 6.396e-02
 raceXgender   (Intercept) 7.983e-10 2.825e-05
Number of obs: 106057, groups:  
raceXyear, 115; year, 39; raceXdivision, 27; raceXage, 12; raceXed, 12; question, 11; division, 9; raceXgender, 6

Fixed effects:
               Estimate Std. Error z value Pr(>|z|)    
(Intercept)   -0.908769   0.364471  -2.493   0.0127 *  
factor(race)2  0.692173   0.133678   5.178 2.24e-07 ***
factor(race)3  0.235080   0.138271   1.700   0.0891 .  
gender        -0.006747   0.015267  -0.442   0.6585    
ed             0.003828   0.034184   0.112   0.9108    
age           -0.096724   0.022181  -4.361 1.30e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) fct()2 fct()3 gender ed    
factor(rc)2 -0.178                            
factor(rc)3 -0.167  0.454                     
gender      -0.061 -0.005  0.002              
ed          -0.236  0.009 -0.009  0.003       
age         -0.155  0.017  0.039 -0.019  0.021
optimizer (Nelder_Mead) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')



> #set seed
> set.seed(23)

> merTools_sims<-100

> #########################################################
> #########################################################
> 
> #USE PREFMODS
> #for this exercise, select prefmods
> #based on the fit exercise in regmods
> #and/or models that did didn't converge
> 
> #identify prefmods
> setwd(filesdir)

> regmodsdf<-read.csv(
+   '01po_regmods_info.csv',
+   stringsAsFactors=F
+ )

> # #which mods have problesm
> # goodmods<-sapply(mods,function(x) {
> #   #x<-mods[[5]]
> #   xsum<-summary(x)
> #   tmp<-xsum$optinfo$conv$lme4$messages
> #   ifelse(is.null(tmp),T,F)
> # }) %>% unname
> # names(mods)[!goodmods]
> # names(mods)[goodmods]
> 
> #choose the pref models
> prefmodsdf<-by(regmodsdf,regmodsdf$dimension,function(df) {
+   #df<-regmodsdf[regmodsdf$dimension=="punitive",]
+   tmp<-df$warnings==""
+   if(sum(tmp)>0) {
+     tmpdf<-df[tmp,]
+     tmpdf[order(tmpdf$bic.rank),][1,]
+   } else {
+     df[df$pref,][1,]
+   }
+ }) %>% rbind.fill

> prefmod_names<-prefmodsdf$mname

> prefmods<-mods[prefmod_names]

> #any mods w/ issues, we discard
> #after making a note that they're discarded
> goodmods<-sapply(prefmods,function(x) {
+   #x<-mods[[5]]
+   xsum<-summary(x)
+   tmp<-xsum$optinfo$conv$lme4$messages
+   ifelse(is.null(tmp),T,F)
+ }) %>% unname

> badmods<-prefmods[!goodmods]

> prefmods<-prefmods[goodmods]

> #########################################################
> #########################################################
> 
> #PREDICT FOR MRP-CELLS
> #post-stratifying using census data
> 
> #load the census data
> setwd(datadir); dir()
 [1] "abcdfs.csv"                                 "All_Bills(1947-2017).csv"                  
 [3] "american_violence_data_20241016_201046.csv" "anesdf.csv"                                
 [5] "beodf.csv"                                  "beodf5.csv"                                
 [7] "billsdf_revised.csv"                        "cbcdf.csv"                                 
 [9] "cbsdfs.csv"                                 "citecount.csv"                             
[11] "congressmen_list.csv"                       "conventional.bib"                          
[13] "cpopdf.csv"                                 "Ethnic_Collect_Action.dta"                 
[15] "fulldf_bills.csv"                           "gallupdfs.csv"                             
[17] "gssdf.csv"                                  "Hall_votes.csv"                            
[19] "histpundf_national_220328.csv"              "housevotes_handcoded_revised.csv"          
[21] "HSall_members.csv"                          "HSall_rollcalls_withissues.csv"            
[23] "incrates_subnationalstate.csv"              "lbj1964.csv"                               
[25] "nbclatdfs.csv"                              "prezdf.csv"                                
[27] "questions_fortex.txt"                       "race_riot.dta"                             
[29] "redf.csv"                                   "roperdfs.csv"                              
[31] "tab_longviolence.csv"                       "timedfs.csv"                               
[33] "votes"                                     

> cpopdf<-read.csv(
+   "cpopdf.csv",
+   stringsAsFactors = F
+ )

> cpopdf$X<-NULL

> #match our vars
> names(cpopdf)<-str_replace(
+   names(cpopdf),
+   "\\.mrp$",""
+ )

> #make sure all vars are in?
> tmpdf<-prefmods[[1]]@frame

> tmpnames<-str_replace(names(tmpdf),"factor\\(","") %>%
+   str_replace("\\)$","") 

> tmpnames<-sapply(tmpnames,function(x) str_split(x,"X")[[1]]) %>%
+   unlist %>% unique

> tmp<-tmpnames%in%names(cpopdf)

> tmp<-tmp | tmpnames%in%c("aff","question")

> prez<-tmpnames[tmp]

> miss<-tmpnames[!tmp]

> if(length(miss)!=0)
+   stop('missing vars')

> #prep cpopdf for prediction
> #now, add dem-based interactions
> loopdf<-expand.grid(
+   v1=c(
+     "race",
+     "race2"
+   ),
+   v2=c(
+     names(cpopdf)[!names(cpopdf)%in%c("race","race2")]
+   ),
+   stringsAsFactors=F
+ )

> tmpseq.i<-1:nrow(loopdf)

> for(i in tmpseq.i) {
+   #print(i)
+   #i<-1
+   thisrow<-loopdf[i,]
+   newname<-paste0(
+     thisrow$v1,
+     "X",
+     thisrow$v2
+   )
+   #make these all distinct categories
+   cpopdf[[newname]]<-paste0(
+     cpopdf[[thisrow$v1]],
+     "_",
+     cpopdf[[thisrow$v2]]
+   )
+ }

> #add edXyear and divisionXyear
> cpopdf$divisionXyear<-paste0(
+   cpopdf$division,"_",cpopdf$year
+ )

> cpopdf$edXyear<-paste0(
+   cpopdf$ed,"_",cpopdf$year
+ )

> #add others that are missing
> cpopdf$raceXdivisionXyear<-paste0(
+   cpopdf$race,"_",
+   cpopdf$division,"_",
+   cpopdf$year
+ )

> cpopdf$raceXedXyear<-paste0(
+   cpopdf$race,"_",
+   cpopdf$ed,"_",
+   cpopdf$year
+ )

> #loop through and predictdf
> tmpseq.i<-seq_along(prefmods)

> fulloutput<-lapply(tmpseq.i,function(i) {
+   
+   #i<-1
+   print(i)
+   ##########
+   
+   thismod<-prefmods[[i]]
+   thismodname<-names(prefmods)[i]
+   thisdimension<-str_extract(
+     thismodname,
+     "all|anxiety|punitive|mistrust"
+   )
+   predictdf<-cpopdf
+   
+   #restrict it for temp speed
+   # tmp<-predictdf$year%in%c(1960,1990) & 
+   #   predictdf$state_alpha2=="AL"
+   # predictdf<-predictdf[tmp,]
+   
+   ######
+   
+   #FIX NEW LEVELS ISSUE
+   
+   #loop through all ranef levels
+   #make sure they don't appear in the data
+   ranefs<-names(ranef(thismod))
+   ranefs<-ranefs[ranefs%in%names(predictdf)]
+   tmpseq.j<-seq_along(ranefs)
+   logicals<-lapply(tmpseq.j,function(j) {
+     ##
+     #j<-1
+     ranefs[j]
+     modlevels<-row.names(
+       ranef(thismod)[[ranefs[j]]]
+     )
+     predlevels<-predictdf[[ranefs[j]]] %>%
+       unique
+     outlevels<-predlevels[!predlevels%in%modlevels]
+     ##rows to discard
+     predictdf[[ranefs[[j]]]]%in%outlevels
+   })
+   badrows<-Reduce(f="|",logicals)
+   predictdf<-predictdf[!badrows,]
+   
+   #add missing refs to rpedictdf
+   medrefs<-get.medianrefs(thismod)
+   ranefs<-names(ranef(thismod))
+   tmp<-ranefs%in%names(predictdf) |
+     str_detect(ranefs,"X")
+   ranefs_miss<-ranefs[!tmp]
+   medrefs_miss<-medrefs[ranefs_miss]
+   tmpseq.i<-seq_along(medrefs_miss)
+   for(i in tmpseq.i) {
+     newvar<-names(medrefs_miss)[i]
+     newval<-medrefs_miss[[i]]
+     predictdf[[newvar]]<-newval
+   }
+   
+   #predict!
+   predictdf$dimension<-thisdimension
+   predictdf$raceXquestion<-paste0(
+     predictdf$race,"_",
+     predictdf$question
+   )
+   
+   #single prediction
+   spredictdf<-predictdf
+   spredictdf$mu<-predict(
+     thismod,
+     newdata=spredictdf,
+     type='response' 
+   ) 
+   spredictdf<-data.table(spredictdf)
+   #collapse to get desired summaries
+   #original spredictdf is also saved
+   spredictdf_ry<-spredictdf[
+   ,
+   .(
+     mu=weighted.mean(mu,pop)
+   )
+   ,
+   by=c(
+     'race',
+     'year'
+   )
+   ]
+   
+   #NB: this is very computationally-intensive,
+   #so we use parallel computation to speed it up a bit
+   mpredictdf<-predictdf
+   numCores <- parallel::detectCores() - 1  
+   registerDoParallel(cores = numCores)
+   st <- proc.time()
+   tmpoutput <- merTools::predictInterval(
+     thismod,
+     newdata = mpredictdf,
+     which = "full",
+     level = 0.95,
+     type = "probability",
+     include.resid.var = FALSE,
+     n.sims = merTools_sims,
+     .parallel = TRUE,  # Enable parallel processing if supported
+     returnSims = TRUE
+   )
+   timetorun <- proc.time() - st
+   print(timetorun)
+   stopImplicitCluster()
+ 
+   simMat<-attr(tmpoutput,"sim.results")
+   simMat<-apply(simMat,1,thismod@resp$family$linkinv)
+   simMat<-t(simMat)
+   tmpdf<-data.frame(simMat)
+   mpredictdf<-cbind(mpredictdf,tmpdf)
+   gathcols<-names(mpredictdf)[str_detect(names(mpredictdf),"^X")]
+   mpredictdf<-gather_(
+     mpredictdf,
+     "rep",
+     "mu",
+     gathcols
+   ) %>% data.table
+   mpredictdf[,rep:=str_replace(rep,"X","")]
+   #collapse for race year
+   mpredictdf_ry<-mpredictdf[
+     ,
+     .(
+       mu=weighted.mean(mu,pop)
+     )
+     ,
+     by=c(
+       'race',
+       'year',
+       'rep'
+     )
+     ]
+   #collapse for raceXedXdivisionXyear
+   mpredictdf_redy<-mpredictdf[
+     ,
+     .(
+       mu=weighted.mean(mu,pop)
+     )
+     ,
+     by=c(
+       'race',
+       'ed',
+       'division',
+       'year',
+       'rep'
+     )
+     ]
+   
+   #returnme
+   returnlist<-list(
+     spredictdf=spredictdf,
+     spredictdf_ry=spredictdf_ry,
+     mpredictdf_ry=mpredictdf_ry,
+     mpredictdf_redy=mpredictdf_redy,
+     timetorun=timetorun #how long it takes to run each
+   )
+   
+ })
[1] 1
   user  system elapsed 
  54.53    3.98   80.55 
`gather_()` was deprecated in tidyr 1.2.0. Please use `gather()` instead. 
`gather_()` was deprecated in tidyr 1.2.0. Please use `gather()` instead. 
[1] 2
   user  system elapsed 
  31.91    2.31   48.04 
`gather_()` was deprecated in tidyr 1.2.0. Please use `gather()` instead. 
`gather_()` was deprecated in tidyr 1.2.0. Please use `gather()` instead. 
[1] 3
   user  system elapsed 
  89.86    7.03  120.09 
`gather_()` was deprecated in tidyr 1.2.0. Please use `gather()` instead. 
`gather_()` was deprecated in tidyr 1.2.0. Please use `gather()` instead. 

> names(fulloutput)<-names(prefmods)

> #########################################################
> #########################################################
> 
> #save out
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> saveRDS(fulloutput,'01po_predictions.RDS')

> #########################################################
> #########################################################
> 
[1] "######"
[1] "Running:"
[1] "08_predict_output.R"

> #########################################################
> #########################################################
> 
> #clear workspace
> rm(list=ls())

> #load packages
> require(stringr)

> require(plyr)

> require(dplyr)

> require(zoo)

> require(data.table)

> require(tidyr)

> require(rprojroot)

> #extra
> require(lme4)

> require(merTools)

> require(doParallel)

> #set dirs
> rootdir<-find_root(
+   criterion=has_file('_rapol.Rproj')
+ )

> codedir<-file.path(rootdir,"code")

> setwd(codedir); dir()
[1] "01_publicopinion" "02_voting"        "03_dind"          "dirs.R"          
[5] "runeverything.R" 

> source('dirs.R')

> #helper functions
> setwd(pcodedir); dir()
 [1] "01_group.R"                "02_prep.R"                 "03_summarize.R"           
 [4] "04_summarize_output.R"     "05_regmods.R"              "06_regmods_output.R"      
 [7] "07_predict.R"              "08_predict_output.R"       "09_extra_output.R"        
[10] "10_RandR_summarize.R"      "11_RandR_model.R"          "12_RandR_modeloutput.R"   
[13] "functions.R"               "getcode.R"                 "getinfo2.R"               
[16] "readpoll_functions.R"      "XX_predict.R"              "XX_predict_output.R"      
[19] "XX_predict_outputTrends.R" "XX_predictscribbles.R"     "XX_regmods.R"             
[22] "XX_regmods_output.R"       "XX_runall.R"               "XX_scanmeta.R"            
[25] "XXdirs.R"                 

> source('readpoll_functions.R')

> source('getinfo2.R')

> source('functions.R')

> #load data and mods
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> fulloutput<-readRDS('01po_predictions.RDS')

> #set seed
> set.seed(23)

> #########################################################
> #########################################################
> 
> #plotting prelims
> require(ggplot2)

> require(ggthemes)

> require(extrafont)

> require(RColorBrewer)

> # #load fonts
> # loadfonts(quiet=T) #register w/ pdf
> # loadfonts(quiet=T,device = "win") #register w/ windows
> # fonts()
> # #get ghostscript, for tex output
> # gsdir<-file.path(
> #   "c:",
> #   "Program Files",
> #   "gs"
> # )
> # gsdir_full<-file.path(
> #   gsdir,
> #   dir(gsdir),
> #   "bin",
> #   "gswin64c.exe"
> # )
> # Sys.setenv(
> #   R_GSCMD = gsdir_full
> # )
> # #initialize graphlist
> # gs.list<-list()
> 
> #quick function to outputdfs
> output <- function(df,tmpname) {
+   setwd(outputdir)
+   if( str_detect(tmpname,"\\.pdf$|\\.png") ) 
+     tmpname<-str_replace(tmpname,"\\.pdf$|\\.png$",".csv")
+   write.csv(
+     df,
+     tmpname,
+     row.names=F
+   )
+ }

> #########################################################
> #########################################################
> 
> #take predictions
> #generate trends w/o SE
> #diff between black and white over time
> #diff between peak and troughs, by race
> #extras:
> #diff between black high ed black low ed
> #diff between white south white non south
> 
> #########################################################
> #########################################################
> 
> #I. TRENDS OVER TIME
> #use spredictdfs to trends
> 
> #loop through fulloutput
> #to generate trends for each model
> tmpseq.i<-seq_along(fulloutput)

> trendsdf<-lapply(tmpseq.i,function(i) {
+   
+   #i<-1
+   tmpoutput<-fulloutput[[i]]
+   thisdimension<-str_extract(
+     names(fulloutput)[i],
+     "all|anxiety|punitive|mistrust"
+   )
+   
+   #use spredictdf
+   spredictdf<-tmpoutput$spredictdf %>% 
+     data.table
+   tmpdf<-spredictdf[
+     ,
+     .(
+       mu=weighted.mean(mu,pop)
+     )
+     ,
+     by=c(
+       'race',
+       'year'
+     )
+     ]
+ 
+   #add a loess
+   returndf<-tmpdf[
+     ,
+     .(
+       mu.loess = loess(
+         mu ~ year
+       ) %>% predict(min(year):max(year)),
+       year=min(year):max(year)
+     )
+     ,
+     by=c(
+       'race'
+     )
+     ]
+   
+   #put them together
+   returndf<-merge(
+     tmpdf,
+     returndf,
+     all=T
+   )
+   returndf$dimension<-thisdimension
+   
+   ###
+   returndf
+ 
+ }) %>% rbind.fill %>% data.table

> tmp<-trendsdf$race%in%c(1,2)

> plotdf<-trendsdf[tmp,]

> #add conventional expectations to this graph
> #this is how to generate the points
> plotdf$facet<-'Estimated'

> loopdf<-expand.grid(
+   race=c(1,2),
+   dimension=c(
+     'punitive',
+     'anxiety',
+     'mistrust'
+   )
+ )

> # #quick function to help generate conventional expectations
> # setwd(codedir); source('genconventional.R')
> 
> # tmpseq.i<-1:nrow(loopdf)
> # tmpdf<-lapply(tmpseq.i,function(i) {
> #   thisrow<-loopdf[i,]
> #   if(thisrow$race==2) {
> #     m<-(thisrow$endpoint-thisrow$startpoint)/(2014-1955)
> #     b<-thisrow$startpoint - (1955 * m)
> #     fun.y<-function(x) {
> #       m * x + b
> #     }
> #     yhat<-sapply(
> #       1955:2014,
> #       fun.y
> #     )
> #     data.frame(
> #       state_alpha2="all",
> #       race=thisrow$race,
> #       dimension=thisrow$dimension,
> #       year=1955:2014,
> #       mu=yhat,
> #       stringsAsFactors=F
> #     )
> #   } else {
> #     if(thisrow$dimension%in%c('punitive','anxiety')) {
> #       data.frame(
> #         state_alpha2="all",
> #         race=thisrow$race,
> #         dimension=thisrow$dimension,
> #         year=1955:2014,
> #         mu=genconventional(40,65,70)$mu/100,
> #         stringsAsFactors=F
> #       )
> #     } else {
> #       data.frame(
> #         state_alpha2="all",
> #         race=thisrow$race,
> #         dimension=thisrow$dimension,
> #         year=1955:2014,
> #         mu=genconventional(50,35,30)$mu/100,
> #         stringsAsFactors=F
> #       )
> #     }
> #   }
> # }) %>% rbind.fill
> # tmpdf$facet<-'Conventional'
> # tmpdf$mu.loess<-tmpdf$mu
> 
> # plotdf<-rbind.fill(
> #   plotdf,
> #   tmpdf
> # )
> 
> #order race
> plotdf$race<-factor(
+   plotdf$race,
+   levels=c(1,2),
+   labels=c("White","Black")
+ )

> tmpcolors<-c('red','blue')

> names(tmpcolors)<-levels(plotdf$race)

> #dimension
> plotdf$dimension<-factor(
+   plotdf$dimension,
+   levels=c(
+     "anxiety",
+     "punitive",
+     "mistrust"
+   ),
+   labels=c(
+     "Anxiety",
+     "Punitiveness",
+     "Mistrust"
+   )
+ )

> plotdf$facet<-factor(
+   plotdf$facet,
+   levels=c(
+     'Conventional',
+     'Estimated'
+   ),
+   labels=c(
+     'Conventional',
+     'Estimated'
+   )
+ )

> g.tmp<-ggplot(
+   plotdf[plotdf$facet=='Estimated',],
+   aes(
+     x=year,
+     y=mu.loess,
+     group=race,
+     color=race
+   )
+ ) +
+   geom_line(
+     size=1
+   ) +
+   geom_point(
+     data=plotdf[plotdf$facet=='Estimated',],
+     aes(
+       y=mu
+     ),
+     alpha=0.25
+   ) +
+   scale_color_manual(
+     name="",
+     values=tmpcolors
+   ) +
+   facet_grid( 
+     dimension ~ .
+   ) +
+   xlab("") +
+   ylab("p(Affirmative)\n") +
+   theme_bw() +
+   theme(
+     legend.position='top',
+     legend.direction='horizontal'
+   ) 

> setwd(outputdir)

> tmpname<-"fig_po_trends.png"

> ggsave(
+   plot=g.tmp,
+   filename=tmpname,
+   width=4,
+   height=8,
+   dpi=300
+ )
Removed 61 rows containing missing values or values outside the scale range (`geom_point()`). 

> output(plotdf,tmpname)

> ggsave(
+   plot=g.tmp,
+   filename='CleggFig2.pdf',
+   width=4,
+   height=8,
+   dpi=300
+ )
Removed 61 rows containing missing values or values outside the scale range (`geom_point()`). 

> #key points
> plotdf[race=='Black' & dimension=='Punitiveness' & year%in%c(1957,1989)]
     race  year        mu  mu.loess    dimension     facet
   <fctr> <int>     <num>     <num>       <fctr>    <fctr>
1:  Black  1957 0.2691317 0.3021080 Punitiveness Estimated
2:  Black  1989 0.5791502 0.4523328 Punitiveness Estimated

> plotdf[race=='Black' & dimension=='Anxiety']
      race  year        mu  mu.loess dimension     facet
    <fctr> <int>     <num>     <num>    <fctr>    <fctr>
 1:  Black  1971 0.5124335 0.5333255   Anxiety Estimated
 2:  Black  1972        NA 0.5532744   Anxiety Estimated
 3:  Black  1973 0.5775846 0.5714568   Anxiety Estimated
 4:  Black  1974 0.6201672 0.5877633   Anxiety Estimated
 5:  Black  1975 0.5999615 0.6020846   Anxiety Estimated
 6:  Black  1976 0.5743464 0.6143113   Anxiety Estimated
 7:  Black  1977 0.6011014 0.6240817   Anxiety Estimated
 8:  Black  1978 0.6225512 0.6315557   Anxiety Estimated
 9:  Black  1979 0.6866959 0.6374061   Anxiety Estimated
10:  Black  1980 0.6963870 0.6423058   Anxiety Estimated
11:  Black  1981 0.7255625 0.6446606   Anxiety Estimated
12:  Black  1982 0.6793427 0.6435233   Anxiety Estimated
13:  Black  1983 0.6778342 0.6405368   Anxiety Estimated
14:  Black  1984 0.5724290 0.6373441   Anxiety Estimated
15:  Black  1985 0.5910239 0.6355880   Anxiety Estimated
16:  Black  1986 0.6090510 0.6345184   Anxiety Estimated
17:  Black  1987 0.5725761 0.6328637   Anxiety Estimated
18:  Black  1988 0.6199003 0.6314844   Anxiety Estimated
19:  Black  1989 0.7090420 0.6312415   Anxiety Estimated
20:  Black  1990 0.5942750 0.6333606   Anxiety Estimated
21:  Black  1991 0.6283796 0.6374946   Anxiety Estimated
22:  Black  1992 0.6853393 0.6421448   Anxiety Estimated
23:  Black  1993 0.6463908 0.6458122   Anxiety Estimated
24:  Black  1994 0.7017600 0.6469981   Anxiety Estimated
25:  Black  1995 0.6862680 0.6465594   Anxiety Estimated
26:  Black  1996 0.6076724 0.6461301   Anxiety Estimated
27:  Black  1997 0.6499118 0.6453774   Anxiety Estimated
28:  Black  1998 0.6396198 0.6439687   Anxiety Estimated
29:  Black  1999        NA 0.6417791   Anxiety Estimated
30:  Black  2000 0.6120358 0.6389758   Anxiety Estimated
31:  Black  2001        NA 0.6356650   Anxiety Estimated
32:  Black  2002 0.5789886 0.6319522   Anxiety Estimated
33:  Black  2003        NA 0.6279435   Anxiety Estimated
34:  Black  2004 0.6291796 0.6237445   Anxiety Estimated
35:  Black  2005        NA 0.6194612   Anxiety Estimated
36:  Black  2006 0.6063009 0.6151994   Anxiety Estimated
37:  Black  2007        NA 0.6108780   Anxiety Estimated
38:  Black  2008 0.6694170 0.6063480   Anxiety Estimated
39:  Black  2009        NA 0.6016134   Anxiety Estimated
40:  Black  2010 0.5534279 0.5966779   Anxiety Estimated
41:  Black  2011        NA 0.5915456   Anxiety Estimated
42:  Black  2012 0.5897752 0.5862203   Anxiety Estimated
43:  Black  2013        NA 0.5807059   Anxiety Estimated
44:  Black  2014 0.5792857 0.5750063   Anxiety Estimated
      race  year        mu  mu.loess dimension     facet

> which.max(plotdf$mu.loess[plotdf$race=='White' & plotdf$dimension=='Anxiety'])
[1] 23

> #########################################################
> #########################################################
> 
> #II. MORE POWERFUL ESTIMATES OF DIFFERENCES
> #use mpredictdfs for ests
> #of diff by race across years
> #and diff by years, by race (trough to peak)
> 
> tmpseq.i<-seq_along(fulloutput)

> diffoutput<-lapply(tmpseq.i,function(i) {
+   
+   #i<-1
+   mpredictdf<-fulloutput[[i]]$mpredictdf_ry %>%
+     data.table
+   mpredictdf$type<-"raw"
+   thisdimension<-str_extract(
+     names(fulloutput)[i],
+     "all|anxiety|punitive|mistrust"
+   )
+ 
+   #use loess smooth to get ests
+   mpredictdf2<-mpredictdf[
+     ,
+     .(
+       mu = loess(
+         mu ~ year
+       ) %>% predict(min(year):max(year)),
+       year=min(year):max(year)
+     )
+     ,
+     by=c(
+       'race',
+       'rep'
+     )
+     ]
+   mpredictdf2$type<-"loess"
+   mpredictdf<-rbind.fill(
+     mpredictdf,
+     mpredictdf2
+   ) %>% data.table
+   
+   #compare white to black, by time
+   tmpdf<-spread(
+     mpredictdf,
+     race,
+     mu
+   ) %>% data.table
+   tmpdf$mu<-tmpdf$`2` - tmpdf$`1`
+   bwdf<-tmpdf[
+     ,
+     .(
+       mu=quantile(mu,0.5,na.rm=T),
+       mu.min=quantile(mu,0.025,na.rm=T),
+       mu.max=quantile(mu,0.975,na.rm=T)
+     ),
+     by=c(
+       'year',
+       'type'
+     )
+     ]
+   bwdf$dimension<-thisdimension
+   
+   
+   #compare peak to trough, by race
+   tmpdf<-spread(
+     mpredictdf,
+     year,
+     mu
+   ) %>% data.table
+   minyear<-min(as.numeric(names(tmpdf)),na.rm=T)
+   gathcols<-names(tmpdf)[!names(tmpdf)%in%c('race','rep','type',minyear)]
+   tmpdf<-gather_(
+     tmpdf,
+     "year",
+     "val",
+     gathcols
+   ) %>% data.table
+   tmpdf$mu <- tmpdf$val - tmpdf[[as.character(minyear)]]
+   yeardf<-tmpdf[
+     ,
+     .(
+       mu=quantile(mu,0.5,na.rm=T),
+       mu.min=quantile(mu,0.025,na.rm=T),
+       mu.max=quantile(mu,0.975,na.rm=T)
+     ),
+     by=c(
+       'race',
+       'year',
+       'type'
+     )
+     ]
+   yeardf$year<-as.numeric(yeardf$year)
+   yeardf$dimension<-thisdimension
+   
+   ###
+   diffoutput<-list(
+     bwdf=bwdf,
+     yeardf=yeardf
+   )
+   diffoutput
+   
+ })
NAs introduced by coercion 
`gather_()` was deprecated in tidyr 1.2.0. Please use `gather()` instead. 
`gather_()` was deprecated in tidyr 1.2.0. Please use `gather()` instead. 
NAs introduced by coercion 
`gather_()` was deprecated in tidyr 1.2.0. Please use `gather()` instead. 
`gather_()` was deprecated in tidyr 1.2.0. Please use `gather()` instead. 
NAs introduced by coercion 
`gather_()` was deprecated in tidyr 1.2.0. Please use `gather()` instead. 
`gather_()` was deprecated in tidyr 1.2.0. Please use `gather()` instead. 

> #########################################################
> 
> #FIG X's - DIFFERENCES
> #plot bw difference
> plotdf<-lapply(diffoutput,function(x) x$bwdf) %>% rbind.fill

> plotdf<-plotdf[plotdf$type=='loess',]

> #dimension
> plotdf$dimension<-factor(
+   plotdf$dimension,
+   levels=c(
+     "anxiety",
+     "punitive",
+     "mistrust"
+   ),
+   labels=c(
+     "Anxiety",
+     "Punitiveness",
+     "Mistrust"
+   )
+ )

> g.tmp<-ggplot(
+   plotdf,
+   aes(
+     x=year,
+     y=mu,
+     ymin=mu.min,
+     ymax=mu.max
+   )
+ ) +
+   geom_line(
+     size=1
+   ) + 
+   geom_ribbon(
+     alpha=0.25
+   ) +
+   geom_hline(
+     yintercept=0,
+     linetype='dashed'
+   ) +
+   facet_wrap(
+     ~ dimension,
+     ncol=1
+   ) +
+   xlab("") + 
+   ylab("Black-White Gap\n") +
+   theme_bw(  )

> setwd(outputdir)

> tmpname<-"fig_po_trends_blackwhitegap.png"

> ggsave(
+   plot=g.tmp,
+   filename=tmpname,
+   width=5,
+   height=8
+ )

> output(plotdf,tmpname)

> #plot difference by year
> plotdf<-lapply(diffoutput,function(x) x$yeardf) %>% rbind.fill

> plotdf<-plotdf[plotdf$type=="loess" & plotdf$race%in%c(1,2),]

> plotdf$race<-factor(
+   plotdf$race,
+   levels=c(1,2),
+   labels=c("White","Black")
+ )

> tmpcolors<-c('red','blue')

> names(tmpcolors)<-levels(plotdf$race)

> plotdf$dimension<-factor(
+   plotdf$dimension,
+   levels=c(
+     "punitive",
+     "anxiety",
+     "mistrust"
+   ),
+   labels=c(
+     "Punitiveness",
+     "Anxiety",
+     "Mistrust"
+   )
+ )

> g.tmp<-ggplot(
+   plotdf,
+   aes(
+     x=year,
+     y=mu,
+     ymin=mu.min,
+     ymax=mu.max,
+     color=race,
+     fill=race
+   )
+ ) +
+   geom_line(
+     size=1
+   ) + 
+   geom_ribbon(
+     alpha=0.25
+   ) +
+   geom_hline(
+     yintercept=0,
+     linetype='dashed'
+   ) +
+   scale_color_manual(
+     guide='none',
+     values=tmpcolors
+   ) +
+   scale_fill_manual(
+     guide='none',
+     values=tmpcolors
+   ) +
+   facet_grid(
+     dimension ~ race
+   ) +
+   xlab("") + 
+   ylab("Over-Time Change\n") +
+   theme_bw( ) +
+   theme(
+     legend.position='bottom',
+     legend.direction='horizontal'
+   )

> setwd(outputdir)

> tmpname<-"fig_po_trends_byrace.png"

> ggsave(
+   plot=g.tmp,
+   filename=tmpname,
+   width=10,
+   height=8
+ )

> output(plotdf,tmpname)

> #########################################################
> #########################################################
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
[1] "######"
[1] "Running:"
[1] "09_extra_output.R"

> #########################################################
> #########################################################
> 
> #clear workspace
> rm(list=ls())

> #load packages
> require(stringr)

> require(plyr)

> require(dplyr)

> require(zoo)

> require(data.table)

> require(tidyr)

> require(rprojroot)

> #extra
> require(lme4)

> require(merTools)

> require(doParallel)

> #set dirs
> rootdir<-find_root(
+   criterion=has_file('_rapol.Rproj')
+ )

> codedir<-file.path(rootdir,"code")

> setwd(codedir); dir()
[1] "01_publicopinion" "02_voting"        "03_dind"          "dirs.R"          
[5] "runeverything.R" 

> source('dirs.R')

> #helper functions
> setwd(pcodedir); dir()
 [1] "01_group.R"                "02_prep.R"                 "03_summarize.R"           
 [4] "04_summarize_output.R"     "05_regmods.R"              "06_regmods_output.R"      
 [7] "07_predict.R"              "08_predict_output.R"       "09_extra_output.R"        
[10] "10_RandR_summarize.R"      "11_RandR_model.R"          "12_RandR_modeloutput.R"   
[13] "functions.R"               "getcode.R"                 "getinfo2.R"               
[16] "readpoll_functions.R"      "XX_predict.R"              "XX_predict_output.R"      
[19] "XX_predict_outputTrends.R" "XX_predictscribbles.R"     "XX_regmods.R"             
[22] "XX_regmods_output.R"       "XX_runall.R"               "XX_scanmeta.R"            
[25] "XXdirs.R"                 

> source('readpoll_functions.R')

> source('getinfo2.R')

> source('functions.R')

> #set seed
> set.seed(23)

> #########################################################
> #########################################################
> 
> #quick function to outputdfs
> output <- function(df,tmpname) {
+   setwd(outputdir)
+   if( str_detect(tmpname,"\\.pdf$|\\.png") ) 
+     tmpname<-str_replace(tmpname,"\\.pdf$|\\.png$",".csv")
+   write.csv(
+     df,
+     tmpname,
+     row.names=F
+   )
+ }

> require(ggplot2)

> #########################################################
> #########################################################
> 
> #LOAD RELEVANT DATA
> 
> #load public opinion data
> setwd(outputdir)

> pundf<-read.csv(
+   'fig_po_trends.csv',
+   stringsAsFactors=F
+ )

> #load crime data
> setwd(datadir)

> vdf<-read.csv(
+   'tab_longviolence.csv'
+ )

> #load race riots and protests data
> setwd(datadir)

> riotsdf<-haven::read_dta(
+   'race_riot.dta'
+ )

> riotsdf<-by(riotsdf,riotsdf$styr,function(df) {
+   #df<-riotsdf[riotsdf$styr==56,]
+   data.frame(
+     var='riots',
+     val=nrow(df[df$race1==1,]),
+     year=paste0("19",unique(df$styr)) %>% as.numeric
+   )
+ }) %>% rbind.fill

> setwd(datadir)

> protestdf<-haven::read_dta(
+   'Ethnic_Collect_Action.dta'
+ )

> protestdf<-by(protestdf,protestdf$styr,function(df) {
+   #df<-riotsdf[riotsdf$styr==56,]
+   data.frame(
+     var='protest',
+     val=nrow(df[df$race1==1,]),
+     year=paste0("19",unique(df$styr)) %>% as.numeric
+   )
+ }) %>% rbind.fill

> #load Turchin's dataset
> setwd(datadir); dir()
 [1] "abcdfs.csv"                                 "All_Bills(1947-2017).csv"                  
 [3] "american_violence_data_20241016_201046.csv" "anesdf.csv"                                
 [5] "beodf.csv"                                  "beodf5.csv"                                
 [7] "billsdf_revised.csv"                        "cbcdf.csv"                                 
 [9] "cbsdfs.csv"                                 "citecount.csv"                             
[11] "congressmen_list.csv"                       "conventional.bib"                          
[13] "cpopdf.csv"                                 "Ethnic_Collect_Action.dta"                 
[15] "fulldf_bills.csv"                           "gallupdfs.csv"                             
[17] "gssdf.csv"                                  "Hall_votes.csv"                            
[19] "histpundf_national_220328.csv"              "housevotes_handcoded_revised.csv"          
[21] "HSall_members.csv"                          "HSall_rollcalls_withissues.csv"            
[23] "incrates_subnationalstate.csv"              "lbj1964.csv"                               
[25] "nbclatdfs.csv"                              "prezdf.csv"                                
[27] "questions_fortex.txt"                       "race_riot.dta"                             
[29] "redf.csv"                                   "roperdfs.csv"                              
[31] "tab_longviolence.csv"                       "timedfs.csv"                               
[33] "votes"                                     

> tdf <- fread(
+   'american_violence_data_20241016_201046.csv'
+ ) 

> tdf$year<-lubridate::year(tdf$date)

> tdf <- tdf[
+   year%in%c(1950:2023) &
+     str_detect(subtypes,'race|ethnic')
+   ,
+   .(
+     var='turchin',
+     val=.N
+   )
+   ,
+   by=c('year')
+ ]

> #put this in rate basis by dividing by population
> tmpvars<-c("year","var","val")

> protestdf<-rbind.fill(
+   riotsdf[,tmpvars],
+   protestdf[,tmpvars],
+   tdf[,tmpvars,with=F]
+ )

> setwd(datadir); dir()
 [1] "abcdfs.csv"                                 "All_Bills(1947-2017).csv"                  
 [3] "american_violence_data_20241016_201046.csv" "anesdf.csv"                                
 [5] "beodf.csv"                                  "beodf5.csv"                                
 [7] "billsdf_revised.csv"                        "cbcdf.csv"                                 
 [9] "cbsdfs.csv"                                 "citecount.csv"                             
[11] "congressmen_list.csv"                       "conventional.bib"                          
[13] "cpopdf.csv"                                 "Ethnic_Collect_Action.dta"                 
[15] "fulldf_bills.csv"                           "gallupdfs.csv"                             
[17] "gssdf.csv"                                  "Hall_votes.csv"                            
[19] "histpundf_national_220328.csv"              "housevotes_handcoded_revised.csv"          
[21] "HSall_members.csv"                          "HSall_rollcalls_withissues.csv"            
[23] "incrates_subnationalstate.csv"              "lbj1964.csv"                               
[25] "nbclatdfs.csv"                              "prezdf.csv"                                
[27] "questions_fortex.txt"                       "race_riot.dta"                             
[29] "redf.csv"                                   "roperdfs.csv"                              
[31] "tab_longviolence.csv"                       "timedfs.csv"                               
[33] "votes"                                     

> popdf<-read.csv(
+   'incrates_subnationalstate.csv',
+   stringsAsFactors=F
+ )

> tmp<-popdf$statename=="United States" &
+   !is.na(popdf$statename)

> popdf<-popdf[tmp,c("year","population_census")]

> protestdf<-merge(
+   protestdf,
+   popdf
+ )

> protestdf$val<-10^7*protestdf$val/protestdf$population_census

> protestdf$population_census<-NULL

> #########################################################
> #########################################################
> 
> #PLOT PROTESTS AND RIOTS
> 
> plotdf<-protestdf

> tmplevels<-c(
+   "riots",
+   "protest",
+   "turchin"
+ )

> tmplabels<-c(
+   "Riots (Olzak)",
+   "Protests (Olzak)",
+   "Political Violence (Turchin)"
+ )

> plotdf$var<-factor(
+   plotdf$var,
+   tmplevels,
+   tmplabels
+ )

> tmpcolors_riot<-c(
+   'grey',
+   'darkgrey',
+   "black"
+ )

> names(tmpcolors_riot)<-
+   levels(plotdf$var)

> g.tmp<-ggplot(
+   plotdf,
+   aes(
+     x=year,
+     y=val,
+     color=var,
+     group=var
+   )
+ ) +
+   geom_line(
+     size=2
+   ) +
+   scale_color_manual(
+     name="",
+     values=tmpcolors_riot
+   ) +
+   xlab("") +
+   ylab("Events per 10 Million People\n") +
+   theme_bw() +
+   theme(
+     legend.position='bottom',
+     legend.direction='horizontal'
+   )

> tmpname<-"fig_po_protests.png"

> setwd(outputdir)

> ggsave(
+   plot=g.tmp,
+   filename=tmpname,
+   width=8,
+   height=6
+ )

> output(plotdf,tmpname)

> #########################################################
> #########################################################
> 
> #FIG - PLOT SHAPE OF PROTESTS/RIOTS
> 
> #plot protests/riots/crime and white public opinion
> #this supports the point made in the correlations above
> #the shape of this curve mirrors the crime rate, not protests..
> 
> setwd(outputdir)

> pundf<-read.csv(
+   'fig_po_trends.csv',
+   stringsAsFactors=F
+ )

> pundf$var<-paste0(
+   pundf$race,"_",pundf$facet,"_",pundf$dimension
+ ) %>% tolower

> tmprows<-str_detect(pundf$var,"estimated") &
+   pundf$dimension!="mistrust"

> pundf$val<-pundf$mu

> tmpcols<-c("year","var","val")

> pundf<-pundf[tmprows,tmpcols]

> setwd(datadir)

> vdf<-read.csv(
+   'tab_longviolence.csv'
+ )

> #merge
> plotdf<-rbind.fill(
+   protestdf,
+   pundf,
+   vdf
+ )

> #common years
> tmp<-plotdf$year%in%1950:2014 &
+   !is.na(plotdf$val)

> plotdf<-plotdf[tmp,]

> #standardize over these common years
> plotdf<-by(plotdf,plotdf$var,function(df) {
+   df$val<-scale(df$val)
+   df
+ }) %>% rbind.fill

> unique(plotdf$var)
 [1] "black_estimated_anxiety"      "black_estimated_mistrust"    
 [3] "black_estimated_punitiveness" "cenhom"                      
 [5] "fbihom"                       "fbipcrt"                     
 [7] "fbivcrt"                      "protest"                     
 [9] "riots"                        "turchin"                     
[11] "white_estimated_anxiety"      "white_estimated_mistrust"    
[13] "white_estimated_punitiveness"

> tmp<-plotdf$var%in%c(
+   "protest",
+   "riots",
+   "turchin",
+   "white_estimated_punitiveness",
+   "black_estimated_punitiveness",
+   "fbivcrt",
+   "fbihom"
+ )

> plotdf<-plotdf[tmp,]

> tmplevels<-c(
+   "white_estimated_punitiveness",
+   "black_estimated_punitiveness",
+   "protest",
+   "riots",
+   "turchin",
+   "fbivcrt",
+   "fbihom"
+ )

> tmplabels<-c(
+   "White Punitiveness",
+   "Black Punitiveness",
+   "Protests (Olzak)",
+   "Riots (Olzak)",
+   "Political Violence (Turchin)",
+   "Violent Crime",
+   "Homicide"
+ )

> plotdf$var<-factor(
+   plotdf$var,
+   tmplevels,
+   tmplabels
+ )

> tmpcolors<-c(
+   'red',
+   'blue',
+   'grey',
+   'darkgrey',
+   "black",
+   'blue',
+   'darkblue'
+ )

> names(tmpcolors)<-levels(plotdf$var)

> #split into two facets
> tmp<-plotdf$var%in%c('Protests (Olzak)','Riots (Olzak)','Political Violence (Turchin)')

> plotdf$facet[tmp]<-'Protests'

> plotdf$facet[!tmp]<-'Crime'

> tmp<-plotdf$var%in%c('White Punitiveness','Black Punitiveness')

> plotdf$facet <- factor(plotdf$facet,c('Protests','Crime'))

> plotdf$facet[tmp]<-NA

> tmpdf<-plotdf[is.na(plotdf$facet),]; tmpdf$facet<-NULL

> g.tmp<- ggplot(
+   plotdf[!is.na(plotdf$facet),],
+   aes(
+     x=year,
+     y=val,
+     group=var,
+     color=var
+   )
+ ) +
+   stat_smooth(
+     geom="line",
+     size=1, 
+     se=FALSE,
+     alpha=0.5
+   ) +
+   facet_wrap(
+     ~ facet 
+   ) +
+   geom_smooth(
+     data=tmpdf[tmpdf$var!='Black Punitiveness',],
+     se=F,
+     size=1.5
+   ) +
+   scale_color_manual(
+     name="",
+     values=tmpcolors,
+     limits = c(
+       #'Black Punitiveness',
+       'White Punitiveness',
+       'Protests (Olzak)',
+       'Riots (Olzak)',
+       'Political Violence (Turchin)',
+       'Violent Crime',
+       'Homicide'
+     )
+   ) +
+   xlab("") +
+   ylab("Normalized Level\n") +
+   theme_bw() +
+   theme(
+     legend.position='bottom',
+     legend.direction='horizontal',
+     legend.text = element_text(size=8)
+   )

> tmpname<-"fig4_po_correlations.png"

> setwd(outputdir)

> ggsave(
+   plot=g.tmp,
+   filename=tmpname,
+   width=10,
+   height=6,
+   dpi=300
+ )
`geom_smooth()` using method = 'loess' and formula = 'y ~ x' 
`geom_smooth()` using method = 'loess' and formula = 'y ~ x' 

> output(plotdf,tmpname)

> ggsave(
+   plot=g.tmp,
+   filename='CleggFig4.pdf',
+   width=10,
+   height=6,
+   dpi=300
+ )
`geom_smooth()` using method = 'loess' and formula = 'y ~ x' 
`geom_smooth()` using method = 'loess' and formula = 'y ~ x' 

> #how long is lag between protests/riots peak
> #and  punitiveness peak
> plotdf$facet<-NULL

> tmpdf<-spread(plotdf,var,val)

> tmpdf$year[sapply(tmpdf,which.max)]
[1] 2014 1994 1989 1965 1967 1968 1991 1980
[1] "######"
[1] "Running:"
[1] "10_RandR_summarize.R"

> #########################################################
> #########################################################
> 
> #clear workspace
> rm(list=ls())

> #load packages
> require(stringr)

> require(plyr)

> require(dplyr)

> require(zoo)

> require(data.table)

> require(tidyr)

> require(rprojroot)

> require(ggplot2)

> #extras
> require(boot)

> #set dirs
> rootdir<-find_root(
+   criterion=has_file('_rapol.Rproj')
+ )

> codedir<-file.path(rootdir,"code")

> setwd(codedir); dir()
[1] "01_publicopinion" "02_voting"        "03_dind"          "dirs.R"          
[5] "runeverything.R" 

> source('dirs.R')

> #helper functions
> setwd(pcodedir); dir()
 [1] "01_group.R"                "02_prep.R"                 "03_summarize.R"           
 [4] "04_summarize_output.R"     "05_regmods.R"              "06_regmods_output.R"      
 [7] "07_predict.R"              "08_predict_output.R"       "09_extra_output.R"        
[10] "10_RandR_summarize.R"      "11_RandR_model.R"          "12_RandR_modeloutput.R"   
[13] "functions.R"               "getcode.R"                 "getinfo2.R"               
[16] "readpoll_functions.R"      "XX_predict.R"              "XX_predict_output.R"      
[19] "XX_predict_outputTrends.R" "XX_predictscribbles.R"     "XX_regmods.R"             
[22] "XX_regmods_output.R"       "XX_runall.R"               "XX_scanmeta.R"            
[25] "XXdirs.R"                 

> source('readpoll_functions.R')

> source('getinfo2.R')

> source('functions.R')

> #load data
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> finaldf<-fread(
+   '01po_dataframe.csv'
+ )

> #########################################################
> #########################################################
> 
> #HELPER FUNCTION
> 
> #uses boot, returns y, ymin, ymax
> #for use w/ data.table
> 
> 
> mean_se.boot<-function(x,w) {
+   
+   #df<-finaldf[question=='adeqprotect.time' & race==1]
+   #x<-df$aff
+   #w<-df$weights
+   dist<-boot(
+     data=x,
+     stat=function(x,d) {
+       mean(x[d])
+     },
+     R=1000,
+     weights=w
+   )
+   dist$t %>% as.vector
+   # if(raw==T) {
+   #   return(dist$t)
+   # } else {
+   #   returndf<-summarize.distribution2(
+   #     dist$t
+   #   )
+   #   returndf$N<-length(x)
+   #   returndf$pval<-
+   #     returndf$pval.class<-NA
+   #   return(returndf)
+   # }
+ }

> summarize.distribution3<-function(ests.distribution) {
+   #ests.distribution<-tmpdist
+   #get quantiles
+   quantiles<-quantile(
+     ests.distribution,
+     c(
+       0.01,
+       0.025,
+       0.05,
+       0.5,
+       0.95,
+       0.975,
+       0.99
+     )
+   )
+   #return mu, mu.min, mu.max
+   mu<-quantiles["50%"]
+   mu.min<-quantiles["2.5%"]
+   mu.max<-quantiles["97.5%"]
+   #and also a pval classification
+   if(mu>=0) {
+     if(quantiles["1%"]>0) {
+       pval.class<-'at alpha=0.01'
+     } else if(quantiles["2.5%"]>0) {
+       pval.class<-'at alpha=0.05'
+     } else if(quantiles["5%"]>0) {
+       pval.class<-'at alpha=0.10'
+     } else {
+       pval.class<-'not sig'
+     }
+   } else if(mu<0) {
+     if(quantiles["99%"]<0) {
+       pval.class<-'at alpha=0.01'
+     } else if(quantiles["97.5%"]<0) {
+       pval.class<-'at alpha=0.05'
+     } else if(quantiles["95%"]<0) {
+       pval.class<-'at alpha=0.10'
+     } else {
+       pval.class<-'not sig'
+     }
+   }
+   # #se
+   # #est of se explodes when lagdv coef is over 1
+   # #so need something that is robust to that scenario
+   # tmpboot<-boot(
+   #   ests.distribution,
+   #   f.sd,
+   #   R=500
+   # )
+   # se<-mean(tmpboot$t)
+   # se.q <- ( quantiles[3] - quantiles[1] ) / 4
+   #SE is less rather than more helpful
+   se<-NA 
+   #se.q<-NA
+   #get something like a two-sided pval test
+   #pval<-ecdf(ests.distribution)(0)
+   #pval<-ifelse(mu<0,(1-pval)*2,pval*2)
+   pval<-NA
+   #return me
+   list(
+     mu=mu,
+     mu.min=mu.min,
+     mu.max=mu.max,
+     se=se,
+     # #se.q=se.q,
+     pval=pval,
+     pval.class=pval.class
+   )
+ }

> #########################################################
> #########################################################
> 
> ####---- R and R comments ----
> 
> #in response to R+R:
> #unlike in the main analysis, 
> #we want to *include* neutrals
> 
> #unlike in the main analysis, 
> #we also want to see it by poll
> 
> #########################################################
> #########################################################
> 
> #### ----- INCLUDE NEUTRALS ----
> 
> #include neutrals, 
> #otherwise same analysis
> 
> rawdf <- finaldf[
+   !is.na(race) & 
+     race%in%c(1,2)
+   ,
+   .(
+     index=1:1000,
+     mu=mean_se.boot(
+       100 * aff,
+       weights
+     )
+   )
+   ,
+   by=c(
+     'dimension',
+     'question',
+     'race'
+   )
+ ]

> #AVERAGES
> sumdf <- rawdf[
+   ,
+   summarize.distribution3(mu)
+   ,
+   by=c(
+     'dimension',
+     'question',
+     'race'
+   )
+ ]; sumdf$neutrals <- 'Neutrals Included'

> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> sumdf2<-fread(
+   '01po_q_avgs.csv'
+ )

> sumdf2 <- sumdf2[race%in%c(1,2),]

> sumdf2$neutrals <- 'Neutrals Excluded'

> #DIFFS
> tmpdf <- pivot_wider(
+   rawdf,
+   names_from=race,
+   values_from=mu
+ ) %>% data.table

> tmpdf$mu <-  tmpdf$`1` - tmpdf$`2`

> diffdf <- tmpdf[
+   ,
+   summarize.distribution3(mu)
+   ,
+   by=c(
+     'dimension',
+     'question'
+   )
+ ]; diffdf$neutrals <- 'Neutrals Included'

> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> diffdf2<-read.csv(
+   '01po_q_diffs.csv',
+   stringsAsFactors=F
+ ); diffdf2$neutrals <- 'Neutrals Excluded'

> #########################################################
> 
> #PLOT AVERAGES
> 
> #put averages together
> plotdf <- rbindlist(
+   list(
+     sumdf,
+     sumdf2
+   ),
+   fill=T
+ )

> #fix question
> #order by dimension,
> #whether summary,
> #then by black mu
> #and make factor 
> plotdf$dimension<-factor(
+   plotdf$dimension,
+   levels=c("anxiety","mistrust","punitive"),
+   labels=c("Anxiety","Mistrust","Punitiveness")
+ )

> roworder<-order(
+   plotdf$question,
+   plotdf$race
+ )

> plotdf<-plotdf[roworder,]

> plotdf$blackmu<-tapply(
+   plotdf$mu[plotdf$neutrals=='Neutrals Excluded'],
+   plotdf$question[plotdf$neutrals=='Neutrals Excluded'],
+   function(x) rep(x[2],4)
+ ) %>% unlist

> neworder<-order(
+   plotdf$dimension,
+   #plotdf$summary,
+   plotdf$blackmu
+ )

> tmplevels<-plotdf$question[neworder] %>%
+   unique

> tmplabels<-sapply(tmplevels,function(x) {
+   getcode(x,"question","shortname",questionsdf)
+ })

> plotdf$question<-factor(
+   plotdf$question,
+   levels=tmplevels,
+   labels=tmplabels
+ )

> # #add face
> # tmpface<-rep("plain",length(tmplabels))
> # tmp<-!str_detect(tmplabels,"\\(")
> # tmpface[tmp]<-"bold"
> 
> # #get location of dividing lines
> # tmpdf<-unique(plotdf[neworder,c("question","dimension")])
> # diffs<-tmpdf$dimension %>%
> #   as.numeric %>% diff
> # hlines<-which(diffs==1) + 0.5
> 
> #order race
> plotdf$race<-factor(
+   plotdf$race,
+   levels=c(1,2),
+   labels=c("White","Black")
+ )

> tmpcolors<-c('red','blue')

> names(tmpcolors)<-levels(plotdf$black)

> #dodge positions
> dodge<-position_dodge(.6)

> g.tmp <- ggplot(
+   plotdf,
+   aes(
+     x=question,
+     y=mu,
+     ymin=mu.min,
+     ymax=mu.max,
+     color=race
+   )
+ ) +
+   geom_errorbar(
+     width=0,
+     position=dodge
+   ) + 
+   geom_point(
+     size=1,
+     position=dodge
+   ) +
+   geom_hline(
+     yintercept=50,
+     linetype='dashed',
+     color='grey'
+   ) +
+   scale_color_manual(
+     name="",
+     values=tmpcolors,
+     guide=guide_legend(
+       direction="horizontal"
+     )
+   ) +
+   xlab("") +
+   ylab("\n% Anxious, Mistrustful or Punitive") +
+   coord_flip() +  
+   facet_grid(
+     rows=vars(dimension),
+     cols=vars(neutrals),
+     scales='free_y',
+     space='free_y'
+   ) +
+   theme_bw(
+     #base_family="CM Roman",
+     base_size=14
+   ) +
+   theme(legend.position="top") +
+   #theme(axis.text.y=element_text(face=tmpface)) +
+   theme(axis.text.y=element_text(size=8))

> setwd(outputdir)

> ggsave(
+   plot=g.tmp,
+   filename='fig_po_averages2.png',
+   width=8,
+   height=9
+ )

> #correlation/diffs
> tmpdf<-plotdf[,c('question','race','mu','neutrals'),with=F]

> tmpdf<-spread(
+   tmpdf,
+   neutrals,
+   mu
+ )

> cor(
+   tmpdf$`Neutrals Excluded`,
+   tmpdf$`Neutrals Included`
+ )
[1] 0.9000382

> tmpdf$diff <- tmpdf$`Neutrals Excluded`-tmpdf$`Neutrals Included`

> tmpdf<-tmpdf[order(tmpdf$diff),]

> median(tmpdf$diff)
[1] 5.912248

> tmpdf<-tmpdf[,c('question','race','diff')]

> tmpdf<-spread(
+   tmpdf,
+   race,
+   diff
+ )

> cor(tmpdf$White,tmpdf$Black)
[1] 0.8745771

> tmpdf$diff<-tmpdf$White -tmpdf$Black

> median(tmpdf$diff)
[1] -0.3037399

> #PLOT DIFFERENCES
> # #aux plot will take difftype avg
> # tmp<-diffdf$difftype=="avg"
> # auxplotdf<-diffdf[tmp,]
> # tmp<-diffdf$difftype!="avg" 
> # plotdf<-diffdf[tmp,]
> plotdf<-diffdf

> #fix question
> #order by dimension,
> #then mu
> #and make factor 
> plotdf$dimension<-factor(
+   plotdf$dimension,
+   levels=c("anxiety","mistrust","punitive"),
+   labels=c("Anxiety","Mistrust","Punitiveness")
+ )

> neworder<-order(
+   plotdf$dimension,
+   #plotdf$summary,
+   plotdf$mu
+ )

> tmplevels<-plotdf$question[neworder] %>%
+   unique

> tmplabels<-sapply(tmplevels,function(x) {
+   getcode(x,"question","shortname",questionsdf)
+ })

> plotdf$question<-factor(
+   plotdf$question,
+   levels=tmplevels,
+   labels=tmplabels
+ )

> #########################################################
> 
> # PLOT DIFFS
> 
> #put diffs together
> plotdf <- rbindlist(
+   list(
+     diffdf,
+     diffdf2
+   ),
+   fill=T
+ )

> #make this black-white rather than white-black
> plotdf$mu <- plotdf$mu * -1

> plotdf$mu.min <- plotdf$mu.min * -1

> plotdf$mu.max <- plotdf$mu.max * -1

> #fix question
> #order by dimension,
> #then mu
> #and make factor 
> plotdf$dimension<-factor(
+   plotdf$dimension,
+   levels=c("anxiety","punitive","mistrust"),
+   labels=c("Anxiety","Punitiveness","Mistrust")
+ )

> neworder<-order(
+   plotdf$dimension[plotdf$neutrals=='Neutrals Excluded'],
+   #plotdf$summary,
+   plotdf$mu[plotdf$neutrals=='Neutrals Excluded']
+ )

> tmplevels<-plotdf$question[plotdf$neutrals=='Neutrals Excluded'][neworder] %>%
+   unique

> tmplabels<-sapply(tmplevels,function(x) {
+   getcode(x,"question","shortname",questionsdf)
+ })

> plotdf$question<-factor(
+   plotdf$question,
+   levels=tmplevels,
+   labels=tmplabels
+ )

> #add pval info to shape of point
> plotdf$pval.shp<-NA

> plotdf$pval.shp[plotdf$pval.class=="at alpha=0.01"]<-1

> plotdf$pval.shp[plotdf$pval.class=="at alpha=0.05"]<-2

> plotdf$pval.shp[plotdf$pval.class=="at alpha=0.10"]<-3

> plotdf$pval.shp[plotdf$pval.class=="not sig"]<-4

> plotdf$pval.shp<-factor(
+   plotdf$pval.shp,
+   levels=c(1,2,3,4),
+   labels=c("at alpha=0.01","at alpha=0.05","at alpha=0.10","not sig")
+ )

> tmpshapes<-c(8,4,16,1)

> names(tmpshapes)<-levels(plotdf$pval.shp)

> shp.labels<-c(
+   bquote(alpha == 0.01),
+   bquote(alpha == 0.05),
+   bquote(alpha == 0.10)
+ )

> g.tmp<-ggplot() + 
+   geom_errorbar(
+     data=plotdf,
+     aes(
+       x=question,
+       ymin=mu.min,
+       ymax=mu.max
+     ),
+     width=0
+   ) + 
+   geom_point(
+     data=plotdf,
+     aes(
+       x=question,
+       y=mu,
+       shape=pval.shp
+     ),
+     size=1
+   ) +
+   geom_hline(
+     yintercept=0,
+     linetype='dashed',
+     color='grey'
+   ) +
+   # geom_vline(
+   #   xintercept=hlines[1],
+   #   linetype='dashed',
+   #   alpha=0.3
+   # ) + 
+   # geom_vline(
+   #   xintercept=hlines[2],
+   #   linetype='dashed',
+   #   alpha=0.3
+   # ) +
+   scale_shape_manual(
+     name="",
+     values=tmpshapes,
+     labels=shp.labels,
+     guide=guide_legend(
+       direction="horizontal"
+     )
+   ) +
+   xlab("") +
+   ylab("\nBlack-White Gap") +
+   coord_flip() +  
+   facet_grid(
+     rows=vars(dimension),
+     cols=vars(neutrals),
+     scales='free_y',
+     space='free_y'
+   ) +
+   theme_bw(
+     #base_family="CM Roman",
+     base_size=14
+   ) +
+   theme(legend.position="top") + 
+   theme(axis.text.y=element_text(size=8))

> setwd(outputdir)

> ggsave(
+   plot=g.tmp,
+   filename='fig_po_diffs2.png',
+   width=8,
+   height=9
+ )

> #correlation
> 
> #differences
> 
> 
> #########################################################
> #########################################################
> 
> #### ----- ENNS-STYLE GRAPH WITH MANUAL TRENDS ----
> 
> plotdf <- finaldf[
+   !is.na(race) & 
+     race%in%c(1,2) &
+     neut==0
+   ,
+   .(
+     mu=weighted.mean(
+       x=100*aff,
+       w=weights
+     )
+   )
+   ,
+   by=c(
+     'year',
+     'dimension',
+     'question',
+     'race'
+   )
+ ]

> tmpdf<-finaldf[,c('question','year')] %>% unique

> tmptab <- table(tmpdf$question)

> goodquestions <- names(tmptab[tmptab>=3])

> plotdf <- plotdf[question%in%goodquestions]

> plotdf$dimension<-factor(
+   plotdf$dimension,
+   levels=c("anxiety","punitive","mistrust"),
+   labels=c("Anxiety","Punitiveness","Mistrust")
+ )

> #order race
> plotdf$race<-factor(
+   plotdf$race,
+   levels=c(1,2),
+   labels=c("White","Black")
+ )

> g.tmp <- ggplot(
+   plotdf,
+   aes(
+     x=year,
+     y=mu,
+     color=question,
+     group=question
+   )
+ ) +
+   geom_point(
+     size=1
+   ) +
+   geom_smooth(
+     method='lm',
+     size=1,
+     se=F
+   ) +
+   facet_grid(
+     rows=vars(dimension),
+     cols=vars(race)
+   ) +
+   scale_color_discrete(name="") +
+   theme_bw() +
+   #theme(
+   #  legend.position='bottom'
+   #) +
+   xlab('\nYear') +
+   ylab('% Anxious, Mistrustful or Punitive\n')

> setwd(outputdir)

> ggsave(
+   plot=g.tmp,
+   filename='fig_po_raw.png',
+   width=8,
+   height=6
+ )
`geom_smooth()` using formula = 'y ~ x' 

> #########################################################
> #########################################################
> 
> #### ----- BLACK ELITES, BLACK NONELITES ----
> 
> #exclude neutrals, 
> #otherwise same analysis
> 
> #limit to questions for which you have enough respondents!
> 
> tmpdf <- finaldf[
+   !is.na(race) & 
+     race%in%c(2) &
+     !is.na(ed) & 
+     ed%in%c(1,4) &
+     age==3 &
+     year>1980 & 
+     year<2000 &
+     neut==0
+   ,
+ ][
+   ,
+   .(
+     .N
+   )
+   ,
+   by=c(
+     'dimension',
+     'question',
+     'ed'
+   )
+ ]

> #questions for which there aren't enough respondents
> badQuestions <- tmpdf$question[tmpdf$N<15]

> rawdf <- finaldf[
+   !is.na(race) & 
+     race%in%c(2) &
+     !is.na(ed) & 
+     ed%in%c(1,4) &
+     #age==3 &
+     year>1980 & 
+     year<2000 &
+     neut==0 &
+     !question%in%badQuestions
+   ,
+   .(
+     index=1:1000,
+     mu=mean_se.boot(
+       100 * aff,
+       weights
+     )
+   )
+   ,
+   by=c(
+     'dimension',
+     'question',
+     'ed'
+   )
+ ]

> #FIG 3 - AVERAGES
> 
> plotdf <- rawdf[
+   ,
+   summarize.distribution3(mu)
+   ,
+   by=c(
+     'dimension',
+     'question',
+     'ed'
+   )
+ ]

> plotdf$ed<-factor(
+   plotdf$ed,
+   levels=c(1,4),
+   labels=c("HS Dropout","College Grad")
+ )

> #fix question
> #order by dimension,
> #then mu
> #and make factor 
> plotdf$dimension<-factor(
+   plotdf$dimension,
+   levels=c("anxiety","punitive","mistrust"),
+   labels=c("Anxiety","Punitiveness","Mistrust")
+ )

> neworder<-order(
+   plotdf$dimension[plotdf$ed=='HS Dropout'],
+   #plotdf$summary,
+   plotdf$mu[plotdf$ed=='HS Dropout']
+ )

> tmplevels<-plotdf$question[plotdf$ed=='HS Dropout'][neworder] %>%
+   unique

> tmplabels<-sapply(tmplevels,function(x) {
+   getcode(x,"question","shortname",questionsdf)
+ })

> plotdf$question<-factor(
+   plotdf$question,
+   levels=tmplevels,
+   labels=tmplabels
+ )

> #add pval info to shape of point
> plotdf$pval.shp<-NA

> plotdf$pval.shp[plotdf$pval.class=="at alpha=0.01"]<-1

> plotdf$pval.shp[plotdf$pval.class=="at alpha=0.05"]<-2

> plotdf$pval.shp[plotdf$pval.class=="at alpha=0.10"]<-3

> plotdf$pval.shp[plotdf$pval.class=="not sig"]<-4

> plotdf$pval.shp<-factor(
+   plotdf$pval.shp,
+   levels=c(1,2,3,4),
+   labels=c("at alpha=0.01","at alpha=0.05","at alpha=0.10","not sig")
+ )

> tmpshapes<-c(8,4,16,1)

> names(tmpshapes)<-levels(plotdf$pval.shp)

> shp.labels<-c(
+   bquote(alpha == 0.01),
+   bquote(alpha == 0.05),
+   bquote(alpha == 0.10)
+ )

> #order ed
> tmpcolors<-c('#009E73','#CC79A7')

> names(tmpcolors)<-levels(plotdf$ed)

> #dodge positions
> dodge<-position_dodge(.6)

> g.tmp <- ggplot(
+   plotdf,
+   aes(
+     x=question,
+     y=mu,
+     ymin=mu.min,
+     ymax=mu.max,
+     color=ed
+   )
+ ) +
+   geom_errorbar(
+     width=0,
+     position=dodge
+   ) + 
+   geom_point(
+     size=1,
+     position=dodge
+   ) +
+   geom_hline(
+     yintercept=50,
+     linetype='dashed',
+     color='grey'
+   ) +
+   scale_color_manual(
+     name="",
+     values=tmpcolors,
+     guide=guide_legend(
+       direction="horizontal"
+     )
+   ) +
+   xlab("") +
+   ylab("\n% Anxious, Mistrustful or Punitive") +
+   coord_flip() +  
+   facet_grid(
+     rows=vars(dimension),
+     scales='free_y',
+     space='free_y'
+   ) +
+   theme_bw(
+     #base_family="CM Roman",
+     base_size=14
+   ) +
+   theme(legend.position="top") +
+   #theme(axis.text.y=element_text(face=tmpface)) +
+   theme(axis.text.y=element_text(size=8))

> setwd(outputdir)

> ggsave(
+   plot=g.tmp,
+   filename='fig_po_averages_elites.png',
+   width=6,
+   height=8,
+   dpi=300
+ )

> ggsave(
+   plot=g.tmp,
+   filename='CleggFig3.pdf',
+   width=6,
+   height=8,
+   dpi=300
+ )

> #########################################################
> 
> #CRIME BILL
> #crime bill, specifically
> #too small to break out by age/ed
> #so get the toplines by race for Section 6
> 
> #how many in support?
> finaldf[
+   !is.na(race) & 
+     race==2 & 
+     question=='crimebill.gallup' 
+   ,
+   .(
+     index=1:1000,
+     mu=mean_se.boot(
+       100 * aff,
+       weights
+     )
+   )
+   
+ ][
+   ,
+   summarize.distribution3(mu)
+ ]
         mu   mu.min   mu.max     se   pval    pval.class
      <num>    <num>    <num> <lgcl> <lgcl>        <char>
1: 62.67606 54.92958 70.44014     NA     NA at alpha=0.01

> #how many opposed?
> finaldf[
+   !is.na(race) & 
+     race==2 & 
+     question=='crimebill.gallup' 
+   ,
+   .(
+     index=1:1000,
+     mu=mean_se.boot(
+       100 * neg,
+       weights
+     )
+   )
+   
+ ][
+   ,
+   summarize.distribution3(mu)
+ ]
         mu   mu.min   mu.max     se   pval    pval.class
      <num>    <num>    <num> <lgcl> <lgcl>        <char>
1: 18.30986 12.65845 25.35211     NA     NA at alpha=0.01

> #########################################################
> 
> #DIFFERENCES
> 
> tmpdf <- pivot_wider(
+   rawdf,
+   names_from=ed,
+   values_from=mu
+ ) %>% data.table

> tmpdf$mu <-  tmpdf$`4` - tmpdf$`1`

> plotdf <- tmpdf[
+   ,
+   summarize.distribution3(mu)
+   ,
+   by=c(
+     'dimension',
+     'question'
+   )
+ ]

> #fix question
> #order by dimension,
> #then mu
> #and make factor 
> plotdf$dimension<-factor(
+   plotdf$dimension,
+   levels=c("anxiety","punitive","mistrust"),
+   labels=c("Anxiety","Punitiveness","Mistrust")
+ )

> neworder<-order(
+   plotdf$dimension,
+   #plotdf$summary,
+   plotdf$mu
+ )

> tmplevels<-plotdf$question[neworder] %>%
+   unique

> tmplabels<-sapply(tmplevels,function(x) {
+   getcode(x,"question","shortname",questionsdf)
+ })

> plotdf$question<-factor(
+   plotdf$question,
+   levels=tmplevels,
+   labels=tmplabels
+ )

> #add pval info to shape of point
> plotdf$pval.shp<-NA

> plotdf$pval.shp[plotdf$pval.class=="at alpha=0.01"]<-1

> plotdf$pval.shp[plotdf$pval.class=="at alpha=0.05"]<-2

> plotdf$pval.shp[plotdf$pval.class=="at alpha=0.10"]<-3

> plotdf$pval.shp[plotdf$pval.class=="not sig"]<-4

> plotdf$pval.shp<-factor(
+   plotdf$pval.shp,
+   levels=c(1,2,3,4),
+   labels=c("at alpha=0.01","at alpha=0.05","at alpha=0.10","not sig")
+ )

> tmpshapes<-c(8,4,16,1)

> names(tmpshapes)<-levels(plotdf$pval.shp)

> shp.labels<-c(
+   bquote(alpha == 0.01),
+   bquote(alpha == 0.05),
+   bquote(alpha == 0.10)
+ )

> g.tmp<-ggplot() + 
+   geom_errorbar(
+     data=plotdf,
+     aes(
+       x=question,
+       ymin=mu.min,
+       ymax=mu.max
+     ),
+     width=0
+   ) + 
+   geom_point(
+     data=plotdf,
+     aes(
+       x=question,
+       y=mu,
+       shape=pval.shp
+     ),
+     size=1
+   ) +
+   geom_hline(
+     yintercept=0,
+     linetype='dashed',
+     color='grey'
+   ) +
+   # geom_vline(
+   #   xintercept=hlines[1],
+   #   linetype='dashed',
+   #   alpha=0.3
+   # ) + 
+   # geom_vline(
+   #   xintercept=hlines[2],
+   #   linetype='dashed',
+   #   alpha=0.3
+   # ) +
+   scale_shape_manual(
+     name="",
+     values=tmpshapes,
+     labels=shp.labels,
+     guide=guide_legend(
+       direction="horizontal"
+     )
+   ) +
+   xlab("") +
+   ylab("\nBlack-White Gap") +
+   coord_flip() +  
+   facet_grid(
+     rows=vars(dimension),
+     scales='free_y',
+     space='free_y'
+   ) +
+   theme_bw(
+     #base_family="CM Roman",
+     base_size=14
+   ) +
+   theme(legend.position="top") + 
+   theme(axis.text.y=element_text(size=8))

> setwd(outputdir)

> ggsave(
+   plot=g.tmp,
+   filename='fig_po_diffs_elites.png',
+   width=6,
+   height=6
+ )
[1] "######"
[1] "Running:"
[1] "11_RandR_model.R"

> #########################################################
> #########################################################
> 
> #clear workspace
> rm(list=ls())

> #load packages
> require(stringr)

> require(plyr)

> require(dplyr)

> require(zoo)

> require(data.table)

> require(tidyr)

> require(rprojroot)

> #extra
> require(data.table)

> require(lme4)

> require(rstanarm)

> #set dirs
> rootdir<-find_root(
+   criterion=has_file('_rapol.Rproj')
+ )

> codedir<-file.path(rootdir,"code")

> setwd(codedir); dir()
[1] "01_publicopinion" "02_voting"        "03_dind"          "dirs.R"          
[5] "runeverything.R" 

> source('dirs.R')

> #helper functions
> setwd(pcodedir); dir()
 [1] "01_group.R"                "02_prep.R"                 "03_summarize.R"           
 [4] "04_summarize_output.R"     "05_regmods.R"              "06_regmods_output.R"      
 [7] "07_predict.R"              "08_predict_output.R"       "09_extra_output.R"        
[10] "10_RandR_summarize.R"      "11_RandR_model.R"          "12_RandR_modeloutput.R"   
[13] "functions.R"               "getcode.R"                 "getinfo2.R"               
[16] "readpoll_functions.R"      "XX_predict.R"              "XX_predict_output.R"      
[19] "XX_predict_outputTrends.R" "XX_predictscribbles.R"     "XX_regmods.R"             
[22] "XX_regmods_output.R"       "XX_runall.R"               "XX_scanmeta.R"            
[25] "XXdirs.R"                 

> source('readpoll_functions.R')

> source('getinfo2.R')

> source('functions.R')

> #set seed
> set.seed(23)

> #sample N
> setwd(filesdir)

> #########################################################
> #########################################################
> 
> #load data
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> finaldf<-fread(
+   '01po_dataframe.csv'
+ )

> finaldf$raceXquestionXyear<-paste0(
+   finaldf$race,
+   "_",
+   finaldf$question,
+   "_",
+   finaldf$year
+ )

> finaldf$questionXyear<-paste0(
+   finaldf$question,
+   "_",
+   finaldf$year
+ )

> #########################################################
> #########################################################
> 
> #SETUP
> 
> felist<-list(
+   bvrt=c(
+     "RACE"
+     )#,
+   # full=c(
+   #   "RACE",
+   #   "gender",
+   #   "ed",
+   #   "age"
+   # )
+ )

> relist<-list(
+   # simple=c(
+   #   "year",
+   #   "GEO",
+   #   "question"
+   #   #####
+   # ),
+   pref=c(
+     "year",
+     #"GEO",
+     "question",
+     #####
+     # "RACEXed",
+     # "RACEXgender",
+     # "RACEXage",
+     "RACEXyear"
+     #"RACEXGEO",
+   )
+   # full=c(
+   #   "year",
+   #   "GEO",
+   #   "question",
+   #   #####
+   #   "RACEXgender",
+   #   "RACEXage",
+   #   "RACEXquestion",
+   #   "RACEXGEOXyear",
+   #   "RACEXedXyear"
+   # )
+ )

> #########################################################
> #########################################################
> 
> #MODS
> modsdf<-expand.grid(
+   dv=c(
+     "aff"
+   ),
+   dimension=c(
+     "punitive",
+     "anxiety",
+     "mistrust"
+   ),
+   fes=c(
+     "bvrt"
+   ),
+   res=c(
+     "pref"
+   ),
+   race=c(
+     "race"
+   ),
+   sample=c(
+     "allpolls"
+   ),
+   stringsAsFactors=F
+ )

> #modname
> modsdf$mname<-apply(
+   modsdf,1,paste0,collapse="."
+ )

> modsdf$sampname<-paste0(
+   modsdf$dimension,
+   ".",
+   modsdf$sample
+ )

> unique(modsdf$sampname)
[1] "punitive.allpolls" "anxiety.allpolls"  "mistrust.allpolls"

> #trim
> modsdf$i<-1:nrow(modsdf)

> #########################################################
> #########################################################
> 
> #GET FORMS
> tmpseq.i<-1:nrow(modsdf)

> forms<-lapply(tmpseq.i,function(i) {
+   #i<-1  
+   #print(i)
+   thisrow<-modsdf[i,]
+   #get thisgeo
+   thisgeo<-ifelse(
+     thisrow$sample=="allpolls",
+     "division",
+     "state_alpha2"
+   )
+   thisrace<-ifelse(
+     thisrow$race=="race",
+     "race",
+     "race2"
+   )
+   #lhs
+   lhs<-thisrow$dv
+   
+   #rhs, fe
+   rhs.fe<-felist[[thisrow$fes]] %>% 
+     str_replace("GEO",thisgeo) %>%
+     str_replace("RACE",paste0("factor(",thisrace,")")) %>%
+     paste(collapse=" + ")
+   
+   #rhs,re
+   re.raw<-relist[[thisrow$res]] %>% 
+     str_replace("GEO",thisgeo) %>%
+     str_replace("RACE",thisrace)
+   rhs.re<-paste0(
+     "(1 | ",re.raw,")"
+   ) %>% 
+     paste(collapse=" + ")
+   rhs<-paste0(
+     rhs.fe," + ",rhs.re
+   )
+   #put together
+   thisform<-paste0(
+     lhs,
+     " ~ ",
+     rhs
+   )
+   as.formula(thisform)
+ })

> names(forms)<-modsdf$mname

> #make sure all vars are prez
> allvars<-lapply(forms,all.vars) %>%
+   unlist %>% unique

> tmp<-allvars%in%names(finaldf)

> if(sum(!tmp)>0) {
+   print(allvars[!tmp])
+   stop()
+ }

> #########################################################
> #########################################################
> 
> #SAMPS
> #each sampname in modsdf
> sampnames<-unique(modsdf$sampname)

> tmpseq.j<-seq_along(sampnames)

> sampspecs<-lapply(tmpseq.j,function(j) {
+   #j<-1
+   print(j)
+   #get all forms w/ this sampname
+   this.sampname<-sampnames[j]
+   tmprows<-modsdf$sampname==this.sampname
+   tmpmods<-modsdf$mname[tmprows]
+   tmpdim<-unique(modsdf$dimension[tmprows])
+   if(length(tmpdim)>1) stop()
+   #these are the vars
+   mycols<-lapply(forms[tmpmods],all.vars) %>% 
+     unlist %>% unique
+   mycols<-c(
+     mycols,
+     "respid",
+     "pollid"
+   ) %>% unique
+   #these are the rows; all vars present and dimension
+   myrows<-complete.cases(finaldf[,mycols,with=F])
+   finaldf$race%in%c(1,2) #only blacks/whites
+   if(tmpdim!="all") {
+     myrows<-myrows & 
+       finaldf$dimension==tmpdim
+   }  
+   #return rows/cols
+   list(
+     rows=myrows,
+     cols=mycols
+   )
+ })
[1] 1
[1] 2
[1] 3

> names(sampspecs)<-sampnames

> #get info of each samp
> sampinfodf<-lapply(tmpseq.j,function(j) {
+   listbit<-sampspecs[[j]]
+   thisdf<-finaldf[listbit$rows,listbit$cols,with=F]
+   data.frame(
+     sampname=names(sampspecs)[j],
+     N=nrow(thisdf),
+     N.resp=length(unique(thisdf$respid)),
+     N.questions=length(unique(thisdf$question)),
+     N.polls=length(unique(thisdf$pollid)),
+     stringsAsFactors=F
+   )
+ }) %>% rbind.fill

> #########################################################
> #########################################################
> 
> # this is time-intensive, so
> # preferable to attempt to run this in parallel
> require(doParallel)

> require(foreach)

> # Register a parallel backend
> num_cores <- detectCores() - 1

> cl <- makeCluster(num_cores)

> registerDoParallel(cl)

> modslist <- foreach(i=tmpseq.i,.packages='data.table') %dopar% {
+   #i<-4
+   print(
+     paste(
+       "Estimating",i,"of",length(tmpseq.i)
+     )
+   )
+   thisrow<-modsdf[i,]
+   thisform<-forms[[i]]
+   ss<-sampspecs[[thisrow$sampname]]
+   thisdf<-finaldf[ss$rows,ss$cols,with=F]
+   #####
+   #fit mod/getmodtime
+   modtime<-system.time(
+     m<-lme4::glmer(
+       data=thisdf,
+       formula=thisform,
+       family=binomial(link="logit")
+     )
+   )
+   list(
+     m=m,
+     modtime=modtime
+   )
+ }

> names(modslist)<-modsdf$mname

> #########################################################
> #########################################################
> 
> #save mods/modtimes
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> saveRDS(
+   modslist,
+   "01po_modslist_RandR.RDS"
+ )
[1] "######"
[1] "Running:"
[1] "12_RandR_modeloutput.R"

> #########################################################
> #########################################################
> 
> #clear workspace
> rm(list=ls())

> #load packages
> require(stringr)

> require(plyr)

> require(dplyr)

> require(zoo)

> require(data.table)

> require(tidyr)

> require(rprojroot)

> #extra
> require(lme4)

> require(merTools)

> require(doParallel)

> #set dirs
> rootdir<-find_root(
+   criterion=has_file('_rapol.Rproj')
+ )

> codedir<-file.path(rootdir,"code")

> setwd(codedir); dir()
[1] "01_publicopinion" "02_voting"        "03_dind"          "dirs.R"          
[5] "runeverything.R" 

> source('dirs.R')

> #helper functions
> setwd(pcodedir); dir()
 [1] "01_group.R"                "02_prep.R"                 "03_summarize.R"           
 [4] "04_summarize_output.R"     "05_regmods.R"              "06_regmods_output.R"      
 [7] "07_predict.R"              "08_predict_output.R"       "09_extra_output.R"        
[10] "10_RandR_summarize.R"      "11_RandR_model.R"          "12_RandR_modeloutput.R"   
[13] "functions.R"               "getcode.R"                 "getinfo2.R"               
[16] "readpoll_functions.R"      "XX_predict.R"              "XX_predict_output.R"      
[19] "XX_predict_outputTrends.R" "XX_predictscribbles.R"     "XX_regmods.R"             
[22] "XX_regmods_output.R"       "XX_runall.R"               "XX_scanmeta.R"            
[25] "XXdirs.R"                 

> source('readpoll_functions.R')

> source('getinfo2.R')

> source('functions.R')

> #load data and mods
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> #finaldf<-fread('finaldf_po.csv')
> mods<-readRDS('01po_modslist_RandR.RDS')

> modtimes<-lapply(mods,function(x) x$modtime['elapsed']/60)

> prefmods<-lapply(mods,function(x) x$m)

> lapply(mods,function(x) x$m %>% summary)
$aff.punitive.bvrt.pref.race.allpolls
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: binomial  ( logit )
Formula: aff ~ factor(race) + (1 | year) + (1 | question) + (1 | raceXyear)
   Data: thisdf

      AIC       BIC    logLik  deviance  df.resid 
 308588.9  308651.5 -154288.4  308576.9    251137 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.4675 -0.9729  0.5410  0.7353  2.8842 

Random effects:
 Groups    Name        Variance Std.Dev.
 raceXyear (Intercept) 0.02461  0.1569  
 year      (Intercept) 0.14293  0.3781  
 question  (Intercept) 0.58323  0.7637  
Number of obs: 251143, groups:  raceXyear, 147; year, 50; question, 19

Fixed effects:
              Estimate Std. Error z value Pr(>|z|)    
(Intercept)    0.02325    0.15609   0.149    0.882    
factor(race)2 -0.80707    0.03456 -23.349   <2e-16 ***
factor(race)3 -0.38292    0.04159  -9.207   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) fct()2
factor(rc)2 -0.025       
factor(rc)3 -0.056  0.346

$aff.anxiety.bvrt.pref.race.allpolls
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: binomial  ( logit )
Formula: aff ~ factor(race) + (1 | year) + (1 | question) + (1 | raceXyear)
   Data: thisdf

     AIC      BIC   logLik deviance df.resid 
158633.7 158691.9 -79310.9 158621.7   120878 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.1480 -1.1502  0.6639  0.7958  1.4386 

Random effects:
 Groups    Name        Variance Std.Dev.
 raceXyear (Intercept) 0.01618  0.1272  
 year      (Intercept) 0.03668  0.1915  
 question  (Intercept) 0.14141  0.3760  
Number of obs: 120884, groups:  raceXyear, 105; year, 35; question, 9

Fixed effects:
              Estimate Std. Error z value Pr(>|z|)    
(Intercept)    0.25321    0.12636   2.004   0.0451 *  
factor(race)2  0.43410    0.03661  11.858   <2e-16 ***
factor(race)3  0.07153    0.04446   1.609   0.1077    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) fct()2
factor(rc)2 -0.096       
factor(rc)3 -0.084  0.322

$aff.mistrust.bvrt.pref.race.allpolls
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: binomial  ( logit )
Formula: aff ~ factor(race) + (1 | year) + (1 | question) + (1 | raceXyear)
   Data: thisdf

     AIC      BIC   logLik deviance df.resid 
109552.8 109610.5 -54770.4 109540.8   110684 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.2454 -0.6795 -0.2753  0.6215  7.2857 

Random effects:
 Groups    Name        Variance Std.Dev.
 raceXyear (Intercept) 0.05808  0.2410  
 year      (Intercept) 0.16126  0.4016  
 question  (Intercept) 1.24233  1.1146  
Number of obs: 110690, groups:  raceXyear, 118; year, 40; question, 11

Fixed effects:
              Estimate Std. Error z value Pr(>|z|)    
(Intercept)   -1.18330    0.23987  -4.933 8.09e-07 ***
factor(race)2  0.64192    0.06081  10.556  < 2e-16 ***
factor(race)3  0.29175    0.06903   4.227 2.37e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) fct()2
factor(rc)2 -0.051       
factor(rc)3 -0.037  0.362


> #set seed
> set.seed(23)

> merTools_sims<-100

> #########################################################
> #########################################################
> 
> #PREDICT FOR MRP-CELLS
> #post-stratifying using census data
> 
> #load the census data
> setwd(datadir); dir()
 [1] "abcdfs.csv"                                 "All_Bills(1947-2017).csv"                  
 [3] "american_violence_data_20241016_201046.csv" "anesdf.csv"                                
 [5] "beodf.csv"                                  "beodf5.csv"                                
 [7] "billsdf_revised.csv"                        "cbcdf.csv"                                 
 [9] "cbsdfs.csv"                                 "citecount.csv"                             
[11] "congressmen_list.csv"                       "conventional.bib"                          
[13] "cpopdf.csv"                                 "Ethnic_Collect_Action.dta"                 
[15] "fulldf_bills.csv"                           "gallupdfs.csv"                             
[17] "gssdf.csv"                                  "Hall_votes.csv"                            
[19] "histpundf_national_220328.csv"              "housevotes_handcoded_revised.csv"          
[21] "HSall_members.csv"                          "HSall_rollcalls_withissues.csv"            
[23] "incrates_subnationalstate.csv"              "lbj1964.csv"                               
[25] "nbclatdfs.csv"                              "prezdf.csv"                                
[27] "questions_fortex.txt"                       "race_riot.dta"                             
[29] "redf.csv"                                   "roperdfs.csv"                              
[31] "tab_longviolence.csv"                       "timedfs.csv"                               
[33] "votes"                                     

> cpopdf<-read.csv(
+   "cpopdf.csv",
+   stringsAsFactors = F
+ )

> cpopdf$X<-NULL

> #match our vars
> names(cpopdf)<-str_replace(
+   names(cpopdf),
+   "\\.mrp$",""
+ )

> #make sure all vars are in?
> tmpdf<-prefmods[[1]]@frame

> tmpnames<-str_replace(names(tmpdf),"factor\\(","") %>%
+   str_replace("\\)$","") 

> tmpnames<-sapply(tmpnames,function(x) str_split(x,"X")[[1]]) %>%
+   unlist %>% unique

> tmp<-tmpnames%in%names(cpopdf)

> tmp<-tmp | tmpnames%in%c("aff","question")

> prez<-tmpnames[tmp]

> miss<-tmpnames[!tmp]

> if(length(miss)!=0)
+   stop('missing vars')

> #prep cpopdf for prediction
> #now, add dem-based interactions
> loopdf<-expand.grid(
+   v1=c(
+     "race",
+     "race2"
+   ),
+   v2=c(
+     names(cpopdf)[!names(cpopdf)%in%c("race","race2")]
+   ),
+   stringsAsFactors=F
+ )

> tmpseq.i<-1:nrow(loopdf)

> for(i in tmpseq.i) {
+   #print(i)
+   #i<-1
+   thisrow<-loopdf[i,]
+   newname<-paste0(
+     thisrow$v1,
+     "X",
+     thisrow$v2
+   )
+   #make these all distinct categories
+   cpopdf[[newname]]<-paste0(
+     cpopdf[[thisrow$v1]],
+     "_",
+     cpopdf[[thisrow$v2]]
+   )
+ }

> #add edXyear and divisionXyear
> cpopdf$divisionXyear<-paste0(
+   cpopdf$division,"_",cpopdf$year
+ )

> cpopdf$edXyear<-paste0(
+   cpopdf$ed,"_",cpopdf$year
+ )

> #add others that are missing
> cpopdf$raceXdivisionXyear<-paste0(
+   cpopdf$race,"_",
+   cpopdf$division,"_",
+   cpopdf$year
+ )

> cpopdf$raceXedXyear<-paste0(
+   cpopdf$race,"_",
+   cpopdf$ed,"_",
+   cpopdf$year
+ )

> #loop through and predictdf
> tmpseq.i<-seq_along(prefmods)

> fulloutput<-lapply(tmpseq.i,function(i) {
+   
+   #i<-3
+   print(i)
+   ##########
+   
+   thismod<-prefmods[[i]]
+   thismodname<-names(prefmods)[i]
+   thisdimension<-str_extract(
+     thismodname,
+     "all|anxiety|punitive|mistrust"
+   )
+   predictdf<-cpopdf
+   
+   #restrict it for temp speed
+   # tmp<-predictdf$year%in%c(1960,1990) & 
+   #   predictdf$state_alpha2=="AL"
+   # predictdf<-predictdf[tmp,]
+   
+   ######
+   
+   #FIX NEW LEVELS ISSUE
+   
+   #loop through all ranef levels
+   #make sure they don't appear in the data
+   ranefs<-names(ranef(thismod))
+   ranefs<-ranefs[ranefs%in%names(predictdf)]
+   tmpseq.j<-seq_along(ranefs)
+   logicals<-lapply(tmpseq.j,function(j) {
+     ##
+     #j<-1
+     ranefs[j]
+     modlevels<-row.names(
+       ranef(thismod)[[ranefs[j]]]
+     )
+     predlevels<-predictdf[[ranefs[j]]] %>%
+       unique
+     outlevels<-predlevels[!predlevels%in%modlevels]
+     ##rows to discard
+     predictdf[[ranefs[[j]]]]%in%outlevels
+   })
+   badrows<-Reduce(f="|",logicals)
+   predictdf<-predictdf[!badrows,]
+   
+   #add missing refs to rpedictdf
+   medrefs<-get.medianrefs(thismod)
+   ranefs<-names(ranef(thismod))
+   tmp<-ranefs%in%names(predictdf) |
+     str_detect(ranefs,"X")
+   ranefs_miss<-ranefs[!tmp]
+   medrefs_miss<-medrefs[ranefs_miss]
+   tmpseq.i<-seq_along(medrefs_miss)
+   for(i in tmpseq.i) {
+     newvar<-names(medrefs_miss)[i]
+     newval<-medrefs_miss[[i]]
+     predictdf[[newvar]]<-newval
+   }
+   
+   #predict!
+   predictdf$dimension<-thisdimension
+   predictdf$raceXquestion<-paste0(
+     predictdf$race,"_",
+     predictdf$question
+   )
+   predictdf$raceXquestionXyear<-paste0(
+     predictdf$race,"_",
+     predictdf$question,"_",
+     predictdf$year
+   )
+   predictdf$questionXyear<-paste0(
+     predictdf$question,"_",
+     predictdf$year
+   )
+   
+   #single prediction
+   spredictdf<-predictdf
+   spredictdf$mu<-predict(
+     thismod,
+     newdata=spredictdf,
+     type='response' 
+   ) 
+   spredictdf<-data.table(spredictdf)
+   #collapse to get desired summaries
+   #original spredictdf is also saved
+   spredictdf_ry<-spredictdf[
+     ,
+     .(
+       mu=weighted.mean(mu,pop)
+     )
+     ,
+     by=c(
+       'race',
+       'year'
+     )
+   ]
+   
+   # #NB: this is very computationally-intensive,
+   # #so we use parallel computation to speed it up a bit
+   # mpredictdf<-predictdf
+   # numCores<-getDoParWorkers()
+   # registerDoParallel(cores=numCores)
+   # st<-proc.time()
+   # tmpoutput<-merTools::predictInterval(
+   #   thismod,
+   #   newdata=mpredictdf,
+   #   which="full",
+   #   level=0.95,
+   #   type='probability',
+   #   include.resid.var=F,
+   #   n.sims=merTools_sims, 
+   #   #.parallel = T,
+   #   #100 X 200k yields about 20 million obs
+   #   #1000 reps and we're talking 200 million
+   #   returnSims=T
+   # )
+   # timetorun<-proc.time() - st
+   # print(timetorun)
+   # 
+   # simMat<-attr(tmpoutput,"sim.results")
+   # simMat<-apply(simMat,1,thismod@resp$family$linkinv)
+   # simMat<-t(simMat)
+   # tmpdf<-data.frame(simMat)
+   # mpredictdf<-cbind(mpredictdf,tmpdf)
+   # gathcols<-names(mpredictdf)[str_detect(names(mpredictdf),"^X")]
+   # mpredictdf<-gather_(
+   #   mpredictdf,
+   #   "rep",
+   #   "mu",
+   #   gathcols
+   # ) %>% data.table
+   # mpredictdf$rep<-str_replace(mpredictdf$rep,"X","")
+   # #collapse for race year
+   # mpredictdf_ry<-mpredictdf[
+   #   ,
+   #   .(
+   #     mu=weighted.mean(mu,pop)
+   #   )
+   #   ,
+   #   by=c(
+   #     'race',
+   #     'year',
+   #     'rep'
+   #   )
+   # ]
+   # #collapse for raceXedXdivisionXyear
+   # mpredictdf_redy<-mpredictdf[
+   #   ,
+   #   .(
+   #     mu=weighted.mean(mu,pop)
+   #   )
+   #   ,
+   #   by=c(
+   #     'race',
+   #     'ed',
+   #     'division',
+   #     'year',
+   #     'rep'
+   #   )
+   # ]
+   
+   #returnme
+   returnlist<-list(
+     spredictdf=spredictdf,
+     spredictdf_ry=spredictdf_ry
+     #mpredictdf_ry=mpredictdf_ry,
+     #mpredictdf_redy=mpredictdf_redy,
+     #timetorun=timetorun #how long it takes to run each
+   )
+   
+ })
[1] 1
[1] 2
[1] 3

> names(fulloutput)<-names(prefmods)

> #########################################################
> #########################################################
> 
> #I. TRENDS OVER TIME
> #use spredictdfs to trends
> 
> require(ggplot2)

> #loop through fulloutput
> #to generate trends for each model
> tmpseq.i<-seq_along(fulloutput)

> trendsdf<-lapply(tmpseq.i,function(i) {
+   
+   #i<-1
+   tmpoutput<-fulloutput[[i]]
+   thisdimension<-str_extract(
+     names(fulloutput)[i],
+     "all|anxiety|punitive|mistrust"
+   )
+   
+   #use spredictdf
+   spredictdf<-tmpoutput$spredictdf %>% 
+     data.table
+   tmpdf<-spredictdf[
+     ,
+     .(
+       mu=weighted.mean(mu,pop)
+     )
+     ,
+     by=c(
+       'race',
+       'year'
+     )
+   ]
+   
+   #add a loess
+   returndf<-tmpdf[
+     ,
+     .(
+       mu.loess = loess(
+         mu ~ year
+       ) %>% predict(min(year):max(year)),
+       year=min(year):max(year)
+     )
+     ,
+     by=c(
+       'race'
+     )
+   ]
+   
+   #put them together
+   returndf<-merge(
+     tmpdf,
+     returndf,
+     all=T
+   )
+   returndf$dimension<-thisdimension
+   
+   ###
+   returndf
+   
+ }) %>% rbind.fill %>% data.table

> tmp<-trendsdf$race%in%c(1,2)

> plotdf<-trendsdf[tmp,]

> plotdf$facet<-'Estimated'

> #order race
> plotdf$race<-factor(
+   plotdf$race,
+   levels=c(1,2),
+   labels=c("White","Black")
+ )

> tmpcolors<-c('red','blue')

> names(tmpcolors)<-levels(plotdf$race)

> #dimension
> plotdf$dimension<-factor(
+   plotdf$dimension,
+   levels=c(
+     "anxiety",
+     "punitive",
+     "mistrust"
+   ),
+   labels=c(
+     "Anxiety",
+     "Punitiveness",
+     "Mistrust"
+   )
+ )

> plotdf$facet<-factor(
+   plotdf$facet,
+   levels=c(
+     'Conventional',
+     'Estimated'
+   ),
+   labels=c(
+     'Conventional',
+     'Estimated'
+   )
+ )

> g.tmp <- ggplot(
+   plotdf,
+   aes(
+     x=year,
+     y=mu.loess,
+     group=race,
+     color=race
+   )
+ ) +
+   geom_line(
+     size=1
+   ) +
+   geom_point(
+     data=plotdf,
+     aes(
+       y=mu
+     ),
+     alpha=0.25
+   ) +
+   scale_color_manual(
+     name="",
+     values=tmpcolors
+   ) +
+   facet_grid( 
+     dimension ~ facet
+   ) +
+   xlab("") +
+   ylab("P(Affirmative)\n") +
+   theme_bw() +
+   theme(
+     legend.position='bottom',
+     legend.direction='horizontal'
+   ) 

> setwd(outputdir)

> tmpname<-"fig_po_trends_sparsest.png"

> ggsave(
+   plot=g.tmp,
+   filename=tmpname,
+   width=4,
+   height=8
+ )
Removed 59 rows containing missing values or values outside the scale range (`geom_point()`). 

> #########################################################
> #########################################################
> 
> #compare sparsest to trends
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> fulloutput<-readRDS('01po_predictions.RDS')

> trendsdf2<-lapply(tmpseq.i,function(i) {
+   #i<-1
+   tmpoutput<-fulloutput[[i]]
+   thisdimension<-str_extract(
+     names(fulloutput)[i],
+     "all|anxiety|punitive|mistrust"
+   )
+   #use spredictdf
+   spredictdf<-tmpoutput$spredictdf %>% 
+     data.table
+   tmpdf<-spredictdf[
+     ,
+     .(
+       mu=weighted.mean(mu,pop)
+     )
+     ,
+     by=c(
+       'race',
+       'year'
+     )
+   ]
+   #add a loess
+   returndf<-tmpdf[
+     ,
+     .(
+       mu.loess = loess(
+         mu ~ year
+       ) %>% predict(min(year):max(year)),
+       year=min(year):max(year)
+     )
+     ,
+     by=c(
+       'race'
+     )
+   ]
+   #put them together
+   returndf<-merge(
+     tmpdf,
+     returndf,
+     all=T
+   )
+   returndf$dimension<-thisdimension
+   
+   ###
+   returndf
+   
+ }) %>% rbind.fill %>% data.table

> #comapre the two
> trendsdf$model <- 'preferred'

> trendsdf2$model <- 'sparsest'

> mydf<-rbindlist(
+   list(
+     trendsdf,
+     trendsdf2
+   )
+ ) %>% pivot_longer(
+   cols=c('mu','mu.loess')
+ ) %>% pivot_wider(
+   names_from=model,
+   values_from=value
+ ) %>% data.table

> #correlated at 0.98-0.99... 
> mydf[
+   ,
+   .(
+     cor(preferred,sparsest,use='c')
+   )
+   ,
+   by=c(
+     'race',
+     'dimension',
+     'name'
+   )
+ ]
     race dimension     name        V1
    <int>    <char>   <char>     <num>
 1:     1  punitive       mu 0.9960024
 2:     1  punitive mu.loess 0.9982197
 3:     2  punitive       mu 0.9869401
 4:     2  punitive mu.loess 0.9984372
 5:     3  punitive       mu 0.9882549
 6:     3  punitive mu.loess 0.9977777
 7:     1   anxiety       mu 0.9946085
 8:     1   anxiety mu.loess 0.9939170
 9:     2   anxiety       mu 0.9903116
10:     2   anxiety mu.loess 0.9843585
11:     3   anxiety       mu 0.9950446
12:     3   anxiety mu.loess 0.9981900
13:     1  mistrust       mu 0.9976743
14:     1  mistrust mu.loess 0.9987646
15:     2  mistrust       mu 0.9944293
16:     2  mistrust mu.loess 0.9972279
17:     3  mistrust       mu 0.9987945
18:     3  mistrust mu.loess 0.9976001

> #########################################################
> #########################################################

> ## run voting patterns scripts (Section 4)
> setwd(vcodedir); dir()
 [1] "01_getvotes.R"           "02_matcheos.R"           "03_merge.R"             
 [4] "04_predict.R"            "05_summarize.R"          "dirs.R"                 
 [7] "genconventional.R"       "XX_bigsamp_irt.R"        "XX_bigsamp_regression.R"
[10] "XX_bills.R"              "XX_cbc.R"                "XX_graphs.R"            
[13] "XX_match.R"              "XX_regmods.R"            "XX_summarize.R"         
[16] "XX_votes.R"             

> myfiles <- dir()[str_detect(dir(), '^[0-9]{2}')]

> for (myfile in myfiles) {
+   print("######")
+   print("Running:")
+   print(myfile)
+ 
+   pcodedir <- file.path(
+     find_root(criterion = has_ .... [TRUNCATED] 
[1] "######"
[1] "Running:"
[1] "01_getvotes.R"

> #########################################################
> #########################################################
> 
> #clear workspace
> rm(list=ls())

> #load packages
> require(stringr)

> require(plyr)

> require(dplyr)

> require(zoo)

> require(data.table)

> require(tidyr)

> require(rprojroot)

> #set dirs
> rootdir<-find_root(
+   criterion=has_file('_rapol.Rproj')
+ )

> codedir<-file.path(rootdir,"code")

> setwd(codedir); dir()
[1] "01_publicopinion" "02_voting"        "03_dind"          "dirs.R"          
[5] "runeverything.R" 

> source('dirs.R')

> #########################################################
> #########################################################
> 
> #LOAD HAND-CODED VOTES
> #source: our own hand-coding of punitive votes
> 
> setwd(datadir); dir()
 [1] "abcdfs.csv"                                 "All_Bills(1947-2017).csv"                  
 [3] "american_violence_data_20241016_201046.csv" "anesdf.csv"                                
 [5] "beodf.csv"                                  "beodf5.csv"                                
 [7] "billsdf_revised.csv"                        "cbcdf.csv"                                 
 [9] "cbsdfs.csv"                                 "citecount.csv"                             
[11] "congressmen_list.csv"                       "conventional.bib"                          
[13] "cpopdf.csv"                                 "Ethnic_Collect_Action.dta"                 
[15] "fulldf_bills.csv"                           "gallupdfs.csv"                             
[17] "gssdf.csv"                                  "Hall_votes.csv"                            
[19] "histpundf_national_220328.csv"              "housevotes_handcoded_revised.csv"          
[21] "HSall_members.csv"                          "HSall_rollcalls_withissues.csv"            
[23] "incrates_subnationalstate.csv"              "lbj1964.csv"                               
[25] "nbclatdfs.csv"                              "prezdf.csv"                                
[27] "questions_fortex.txt"                       "race_riot.dta"                             
[29] "redf.csv"                                   "roperdfs.csv"                              
[31] "tab_longviolence.csv"                       "timedfs.csv"                               
[33] "votes"                                     

> votesdf<-read.csv(
+   'housevotes_handcoded_revised.csv',
+   stringsAsFactors=F
+ )

> votesdf$housevote<-tolower(votesdf$housevote)

> votesdf$billid<-paste(
+   votesdf$session,
+   votesdf$year,
+   votesdf$housevote,
+   sep="_"
+ )

> #these are dates w/ dem pre
> keydates<-c(
+   #dem
+   "11/22/1963",
+   "01/20/1969",
+   #dem
+   "01/21/1977",
+   "01/20/1981",
+   #dem
+   "01/21/1993",
+   "01/20/2001",
+   #dem
+   "01/21/2009",
+   "01/20/2017"
+ )

> datesdf<-matrix(
+   keydates,
+   ncol=2,
+   byrow=T
+ ) %>% as.data.frame

> names(datesdf)<-c(
+   "stdate",
+   "enddate"
+ )

> datesdf$stdate<-base::as.Date(
+   datesdf$stdate,
+   format="%m/%d/%Y"
+ )

> datesdf$enddate<-base::as.Date(
+   datesdf$enddate,
+   format="%m/%d/%Y"
+ )

> #classify
> #get all dates
> demprez.dates<-lapply(1:nrow(datesdf),function(i) {
+   thisrow<-datesdf[i,]
+   thisrow$stdate:thisrow$enddate
+ }) %>% unlist

> #add this var
> votesdf$demprez<-0

> votesdf$date<-base::as.Date(
+   votesdf$date,
+   format="%m/%d/%Y"
+ )

> tmp<-votesdf$date%in%demprez.dates

> votesdf$demprez[tmp]<-1

> #add date
> votesdf$date<-lubridate::ymd(votesdf$date)

> #congress will denote which session of congress
> #this was called session in votesdf
> votesdf$congress<-votesdf$session

> votesdf$session<-NULL

> #we need bill number
> #this we can get from the topline 
> #of the congress votes data that we downloaded
> tmpdir<-file.path(
+   datadir,
+   "votes"
+ ); setwd(tmpdir)

> tmpseq.i<-seq_along(dir())

> metadf<-lapply(tmpseq.i,function(i) {
+   x<-dir()[i]
+   tmp<-readLines(
+     x,n=1
+   )
+   bill_name<-str_extract(tmp,"(H\\.\\sR\\.|H\\.Res\\.|H\\.R\\.|H\\sR|HR|S\\.)\\s[0-9]+") %>%
+     tolower
+   data.frame(
+     filename=dir()[i],
+     bill_name,
+     stringsAsFactors=F
+   )
+ }) %>% rbind.fill

> #extract what you need to merge votesdf
> metadf$billid<-str_extract(
+   metadf$filename,
+   "[0-9]{2,3}\\-[0-9]{4}\\_h[0-9]{2,3}"
+ ) %>% str_replace("\\-","_")

> #we want bill number
> #and bill prefix
> metadf$bill_number<-str_extract(
+   metadf$bill_name,"[0-9]+"
+ )

> metadf$bill_type<-str_extract(
+   metadf$bill_name,"[a-z\\s\\.]+"
+ ) %>% str_replace_all("\\s","") %>%
+   str_replace_all("\\.","")

> #put this back into votesdf
> votesdf<-merge(
+   votesdf,
+   metadf, 
+   by='billid',
+   all.x=T
+ )

> #########################################################
> #########################################################
> 
> #load all votes from JSON file
> #these are from https://voteview.com
> setwd(datadir)

> allvotesdf<-fread(
+   "HSall_rollcalls_withissues.csv",
+   stringsAsFactors=F
+ )

> #give each row a rowid
> #this will be useful when we have to discover
> #information about bills in pundf for manaul match
> allvotesdf$rowid<-1:nrow(allvotesdf)

> #renames,lowercasing
> allvotesdf$chamber<-tolower(allvotesdf$chamber)

> #fix bill number/prefix (no bill numbers for congress 99 and 100)
> allvotesdf$bill_prefix<-str_extract(allvotesdf$bill_number,"[A-z]+")

> allvotesdf$bill_prefix<-tolower(allvotesdf$bill_prefix)

> allvotesdf$bill_number<-str_extract(allvotesdf$bill_number,"[0-9]+")

> #clean up bill prefix and chamber in allvotesdf, call it bill_type
> #there are many different prefixes for the same thing
> allvotesdf$bill_type<-NA

> tmptab<-table(allvotesdf$bill_prefix) %>% sort(decreasing=T)

> #house bills
> tmptype<-c("hr","h","hhr")

> tmp<-allvotesdf$bill_prefix%in%tmptype & 
+   !is.na(allvotesdf$bill_prefix)

> allvotesdf$bill_type[tmp]<-"hr"

> #senate bills
> tmptype<-c("s")

> tmp<-allvotesdf$bill_prefix%in%tmptype & 
+   !is.na(allvotesdf$bill_prefix)

> allvotesdf$bill_type[tmp]<-"s"

> #house resolutions
> tmptype<-c("hres","hre")

> tmp<-allvotesdf$bill_prefix%in%tmptype & 
+   !is.na(allvotesdf$bill_prefix)

> allvotesdf$bill_type[tmp]<-"hres"

> #senate resolutions
> tmptype<-c("sres","sre","sr")

> tmp<-allvotesdf$bill_prefix%in%tmptype & 
+   !is.na(allvotesdf$bill_prefix)

> allvotesdf$bill_type[tmp]<-"sres"

> #house concurrent resolutions
> tmptype<-c("hconres","hcr","hconr","hcon","hcre","hcres")

> tmp<-allvotesdf$bill_prefix%in%tmptype & 
+   !is.na(allvotesdf$bill_prefix)

> allvotesdf$bill_type[tmp]<-"hcon"

> #senate concurrent resolutions
> tmptype<-c("sconres","scr","scre","sconr","scres","scon")

> tmp<-allvotesdf$bill_prefix%in%tmptype & 
+   !is.na(allvotesdf$bill_prefix)

> allvotesdf$bill_type[tmp]<-"scon"

> #house joint resolutions
> tmptype<-c("hjr","hjres","hjre","hj","hrj")

> tmp<-allvotesdf$bill_prefix%in%tmptype & 
+   !is.na(allvotesdf$bill_prefix)

> allvotesdf$bill_type[tmp]<-"hjres"

> #senate joint resolutions
> tmptype<-c("sjres","sjre","sjr")

> tmp<-allvotesdf$bill_prefix%in%tmptype & 
+   !is.na(allvotesdf$bill_prefix)

> allvotesdf$bill_type[tmp]<-"sjres"

> #unknown codes
> tmptype<-c("pn","ht","treatydoc")

> tmp<-allvotesdf$bill_prefix%in%tmptype & 
+   !is.na(allvotesdf$bill_prefix)

> allvotesdf$bill_type[tmp]<-NA

> #take a look at bill_type
> tmptab<-table(allvotesdf$bill_type,useNA = 'a')

> tmptab/sum(tmptab) #about 30% are NA's.. 

       hcon       hjres          hr        hres           s        scon       sjres 
0.010765101 0.022628299 0.447619852 0.055890303 0.152982693 0.008822315 0.010201975 
       sres        <NA> 
0.007620978 0.283468484 

> #########################################################
> #########################################################
> 
> #MERGE votesdf W/ allvotesdf
> #all votes we hand-coded in votesdf
> #should be in allvotesdf
> 
> #we loop through every vote in allvotesdf
> #and find its row in votesdf
> #we do this automatically for most, 
> #but some we have to match manually
> 
> allvotesdf$bill_id<-paste0(
+   allvotesdf$congress,"-",
+   allvotesdf$bill_type,"-",
+   allvotesdf$bill_number
+ )

> votesdf$bill_id<-paste0(
+   votesdf$congress,"-",
+   votesdf$bill_type,"-",
+   votesdf$bill_number
+ )

> votesdf$congress_rollnumber<-paste0(
+   votesdf$congress,"-",
+   str_replace(votesdf$housevote,"h","")
+ )

> #we will use rollnumber where clerk_rollnumber doesn't exist
> allvotesdf$congress[!is.na(allvotesdf$rollnumber)] %>% unique
  [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20  21  22
 [23]  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40  41  42  43  44
 [45]  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66
 [67]  67  68  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88
 [89]  89  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 109 110
[111] 111 112 113 114 115 116

> allvotesdf$date[!is.na(allvotesdf$clerk_rollnumber)] %>% range
[1] "1989-01-25" "2019-02-14"

> allvotesdf$congress_rollnumber<-paste0(
+   allvotesdf$congress,"-",
+   allvotesdf$rollnumber
+ )

> tmp<-!is.na(allvotesdf$clerk_rollnumber)

> allvotesdf$congress_rollnumber[tmp]<-paste0(
+   allvotesdf$congress[tmp],"-",
+   allvotesdf$clerk_rollnumber[tmp]
+ )

> tmpseq.i<-1:nrow(votesdf)

> votesdf$rowid<-sapply(tmpseq.i,function(i) {
+   #i<-42
+   print(i)
+   
+   thisrow<-votesdf[i,]
+   
+   #try to match by congress and rollnumber and date
+   tmp<-allvotesdf$congress_rollnumber==thisrow$congress_rollnumber &
+     allvotesdf$chamber=="house"
+   allvotesdf[tmp,]
+   thisrow
+   
+   y<-NA
+   
+   #some manual matches here
+   if(thisrow$billid=='101_1989_h336') y<-78076
+   if(thisrow$billid=='91_1970_h367') y<-58873
+   
+   #auto matches if no manual match
+   if(is.na(y)) {
+     if(sum(tmp)==1) {
+       #dates should match
+       if(allvotesdf$date[tmp]==thisrow$date) {
+         y<-allvotesdf$rowid[tmp]
+       } else {
+         #this is wrong, and we have to match by billid
+         tmp2<-allvotesdf$bill_id==thisrow$bill_id & 
+           allvotesdf$date==thisrow$date
+         if(sum(tmp2)==1) {
+           y<-allvotesdf$rowid[tmp2]
+         } else {
+           stop(print(i))
+         }
+       }
+     } else if(sum(tmp)>1) {
+       
+       tmp3<-tmp & allvotesdf$bill_id==thisrow$bill_id
+       if(sum(tmp3)==1) {
+         y<-allvotesdf$rowid[tmp3]
+       } else {
+         stop(print(i))
+       }
+       
+     } else {
+       stop(print(i))
+     }
+   }
+   y
+ })
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11
[1] 12
[1] 13
[1] 14
[1] 15
[1] 16
[1] 17
[1] 18
[1] 19
[1] 20
[1] 21
[1] 22
[1] 23
[1] 24
[1] 25
[1] 26
[1] 27
[1] 28
[1] 29
[1] 30
[1] 31
[1] 32
[1] 33
[1] 34
[1] 35
[1] 36
[1] 37
[1] 38
[1] 39
[1] 40
[1] 41
[1] 42
[1] 43
[1] 44
[1] 45
[1] 46

> #remove superfluous vars from votesdf
> votesdf$bill_id<-
+   votesdf$bill_number<-
+   votesdf$congress_rollnumber<-
+   votesdf$bill_type<-
+   votesdf$date<-
+   votesdf$congress<-NULL

> #merge!
> fulldf<-merge(
+   votesdf,
+   allvotesdf,
+   by='rowid',
+   all=T
+ )

> #inspection reveals this worked well
> 
> #########################################################
> #########################################################
> 
> #GENERATE FULL SAMPLE
> 
> #we have a sample of hand-coded major law and order votes
> #we now use these hand-coded law and order votes
> #to infer the larger set of punitive votes that we didn't hand code
> #and, later, what constitutes a 'punitive' vote (i.e., yes or no)
> 
> #these are our hand-coded bills
> tmp<-!is.na(fulldf$billtitle)

> sum(tmp)==nrow(votesdf) #everything matched
[1] TRUE

> #these are the different issue-coding variables
> names(fulldf)
 [1] "rowid"                   "billid"                  "order"                  
 [4] "year"                    "housevote"               "major"                  
 [7] "class"                   "billtitle"               "billnumber"             
[10] "amendtitle"              "shortname"               "votedeets"              
[13] "punitive"                "passed"                  "mandmin"                
[16] "demprez"                 "filename"                "bill_name"              
[19] "congress"                "chamber"                 "rollnumber"             
[22] "date"                    "session"                 "clerk_rollnumber"       
[25] "yea_count"               "nay_count"               "nominate_mid_1"         
[28] "nominate_mid_2"          "nominate_spread_1"       "nominate_spread_2"      
[31] "nominate_log_likelihood" "bill_number"             "vote_result"            
[34] "vote_desc"               "vote_question"           "dtl_desc"               
[37] "issue_codes"             "peltzman_codes"          "clausen_codes"          
[40] "crs_policy_area"         "crs_subjects"            "congress_url"           
[43] "source_documents"        "bill_prefix"             "bill_type"              
[46] "bill_id"                 "congress_rollnumber"    

> tmpvars<-c(
+   "issue_codes",
+   "peltzman_codes",
+   "clausen_codes",
+   "crs_policy_area"
+ )

> table(fulldf$crs_policy_area[tmp]) %>% sort #mostly crime and law enforcement

Armed Forces and National Security                           Congress 
                                 1                                  1 
      Finance and Financial Sector                             Health 
                                 1                                  1 
                       Immigration       Economics and Public Finance 
                                 1                                  2 
         Crime and Law Enforcement 
                                29 

> table(fulldf$issue_codes[tmp]) %>% sort #judiciary and narcotics

                  Abortion/Care of deformed newborns 
                                                   1 
                                 Banking and Finance 
                                                   1 
              Children (aid, infant mortality, etc.) 
                                                   1 
Civil Rights/Desegregation/Busing/Affirmative Action 
                                                   1 
                                           Education 
                                                   1 
                          Immigration/Naturalization 
                                                   1 
                                   Unemployment/Jobs 
                                                   1 
                                           Judiciary 
                                                   2 
                                           Narcotics 
                                                   8 

> table(fulldf$peltzman_codes[tmp]) %>% sort #domestic social policy/regulation

     Budget General Interest/Defense Policy Budget 
                                                 1 
    Budget Special Interest/Domestic Social Policy 
                                                 1 
                     D. C./Government Organization 
                                                 1 
                        Defense Policy Resolutions 
                                                 1 
                           Government Organization 
                                                 1 
                           Budget General Interest 
                                                 2 
                           Budget Special Interest 
                                                 2 
    Domestic Social Policy/Budget Special Interest 
                                                 8 
                            Domestic Social Policy 
                                                11 
Domestic Social Policy/Regulation Special Interest 
                                                17 

> table(fulldf$clausen_codes[tmp]) %>% sort #civil liberties

Foreign and Defense Policy      Government Management             Social Welfare 
                         1                          5                          6 
           Civil Liberties 
                        33 

> #peltzman and clausen seem too general
> #instead, we will look at crime/law enforcement
> #and complement this w/ judiciary and narcotics
> 
> fulldf$crs_policy_area %>% unique
 [1] NA                                           
 [2] "Congress"                                   
 [3] "Transportation and Public Works"            
 [4] "Environmental Protection"                   
 [5] "Economics and Public Finance"               
 [6] "Taxation"                                   
 [7] "Education"                                  
 [8] "Health"                                     
 [9] "Social Welfare"                             
[10] "Law"                                        
[11] "International Affairs"                      
[12] "Agriculture and Food"                       
[13] "Crime and Law Enforcement"                  
[14] "Armed Forces and National Security"         
[15] "Government Operations and Politics"         
[16] "Commerce"                                   
[17] "Housing and Community Development"          
[18] "Emergency Management"                       
[19] "Science, Technology, Communications"        
[20] "Labor and Employment"                       
[21] "Arts, Culture, Religion"                    
[22] "Public Lands and Natural Resources"         
[23] "Water Resources Development"                
[24] "Energy"                                     
[25] "Foreign Trade and International Finance"    
[26] "Immigration"                                
[27] "Sports and Recreation"                      
[28] "Animals"                                    
[29] "Finance and Financial Sector"               
[30] "Families"                                   
[31] "Native Americans"                           
[32] "Civil Rights and Liberties, Minority Issues"
[33] "Social Sciences and History"                

> fulldf$issue_codes %>% unique
   [1] "Tariffs"                                                                                                        
   [2] "Disputed Elections to Congress"                                                                                 
   [3] NA                                                                                                               
   [4] "Congressional Pay and Benefits"                                                                                 
   [5] "Constitutional Amendments"                                                                                      
   [6] "Judiciary"                                                                                                      
   [7] "Slavery"                                                                                                        
   [8] "Banking and Finance"                                                                                            
   [9] "Tax rates"                                                                                                      
  [10] "Public Works"                                                                                                   
  [11] "National Bank"                                                                                                  
  [12] "Public Lands"                                                                                                   
  [13] "Ratio of Representatives to Population"                                                                         
  [14] "Supreme Court"                                                                                                  
  [15] "Treaties"                                                                                                       
  [16] "Electoral Votes"                                                                                                
  [17] "Fish and Wildlife"                                                                                              
  [18] "Impeachment of President/Judiciary"                                                                             
  [19] "Impeachments and Investigations"                                                                                
  [20] "Military Pensions/Veterans Benefits"                                                                            
  [21] "Mediterranean Pirates"                                                                                          
  [22] "Shipping/Maritime"                                                                                              
  [23] "Whiskey Rebellion"                                                                                              
  [24] "Immigration/Naturalization"                                                                                     
  [25] "Humanitarian Assistance (foreign)"                                                                              
  [26] "Slavery/Treaties"                                                                                               
  [27] "Alien and Sedition Laws"                                                                                        
  [28] "Impeachment of President"                                                                                       
  [29] "States Rights vs. Federal Government"                                                                           
  [30] "Voting Rights"                                                                                                  
  [31] "Religion"                                                                                                       
  [32] "U.S. Currency"                                                                                                  
  [33] "Mediterranean Pirates/Treaties"                                                                                 
  [34] "Science and Technology"                                                                                         
  [35] "Campaign Contributions, Lobbying/House Ethics Campaign Laws"                                                    
  [36] "Temperance and Liquor"                                                                                          
  [37] "Children (aid, infant mortality, etc.)"                                                                         
  [38] "Agriculture"                                                                                                    
  [39] "Public Health"                                                                                                  
  [40] "Welfare"                                                                                                        
  [41] "Election of House Officers"                                                                                     
  [42] "Budget resolution "                                                                                             
  [43] "Women's Equality"                                                                                               
  [44] "Exchange Rates"                                                                                                 
  [45] "Nullification/Secession/Reconstruction"                                                                         
  [46] "Education"                                                                                                      
  [47] "Unemployment/Jobs"                                                                                              
  [48] "Election of the Speaker of the House"                                                                           
  [49] "Civil Service and Patronage"                                                                                    
  [50] "Banking and Finance/Public Lands"                                                                               
  [51] "Public Safety"                                                                                                  
  [52] "Presidential Impeachment"                                                                                       
  [53] "Amnesty (all wars)"                                                                                             
  [54] "Debt Ceilings"                                                                                                  
  [55] "Banking and Finance/National Bank"                                                                              
  [56] "Treaties/Religion"                                                                                              
  [57] "Religion/Treaties"                                                                                              
  [58] "Public Lands/Education"                                                                                         
  [59] "Civil Rights/Desegregation/Busing/Affirmative Action"                                                           
  [60] "Gasoline rationing/allocation"                                                                                  
  [61] "Treaties/Central America"                                                                                       
  [62] "Public Lands/Agriculture"                                                                                       
  [63] "Tax rates/Amnesty (all wars)"                                                                                   
  [64] "Judiciary/Slavery"                                                                                              
  [65] "Banking and Finance/Tariffs"                                                                                    
  [66] "Slavery/Nullification/Secession/Reconstruction"                                                                 
  [67] "Constitutional Amendments/Slavery"                                                                              
  [68] "Slavery/Constitutional Amendments"                                                                              
  [69] "Selective Service (The Draft)"                                                                                  
  [70] "Disputed Elections to Congress/Nullification/Secession/Reconstruction"                                          
  [71] "Impeachments and Investigations/Nullification/Secession/Reconstruction"                                         
  [72] "U.S. Currency/Banking and Finance"                                                                              
  [73] "Women's Equality/Fish and Wildlife"                                                                             
  [74] "Tax rates/Nullification/Secession/Reconstruction"                                                               
  [75] "Tax rates/Temperance and Liquor"                                                                                
  [76] "Tax rates/Slavery"                                                                                              
  [77] "Tariffs/Temperance and Liquor"                                                                                  
  [78] "Tax rates/Banking and Finance"                                                                                  
  [79] "Judiciary/Civil Rights/Desegregation/Busing/Affirmative Action"                                                 
  [80] "Slavery/Civil Rights/Desegregation/Busing/Affirmative Action"                                                   
  [81] "Judiciary/Nullification/Secession/Reconstruction"                                                               
  [82] "Minorities (non-black)"                                                                                         
  [83] "Public Lands/Civil Rights/Desegregation/Busing/Affirmative Action"                                              
  [84] "Selective Service (The Draft)/Religion"                                                                         
  [85] "Voting Rights/Civil Rights/Desegregation/Busing/Affirmative Action"                                             
  [86] "Public Lands/Religion"                                                                                          
  [87] "Tariffs/Religion"                                                                                               
  [88] "Voting Rights/Electoral Votes"                                                                                  
  [89] "Workplace conditions/8 hour day"                                                                                
  [90] "Voting Rights/Nullification/Secession/Reconstruction"                                                           
  [91] "Science and Technology/Ratio of Representatives to Population"                                                  
  [92] "Civil Rights/Desegregation/Busing/Affirmative Action/Voting Rights"                                             
  [93] "Nullification/Secession/Reconstruction/Civil Rights/Desegregation/Busing/Affirmative Action"                    
  [94] "Food Stamps/Food Programs"                                                                                      
  [95] "Nullification/Secession/Reconstruction/Congressional Pay and Benefits"                                          
  [96] "Constitutional Amendments/Civil Rights/Desegregation/Busing/Affirmative Action"                                 
  [97] "Constitutional Amendments/Voting Rights"                                                                        
  [98] "Banking and Finance/U.S. Currency"                                                                              
  [99] "Immigration/Naturalization/Civil Rights/Desegregation/Busing/Affirmative Action"                                
 [100] "Parks and Conservation"                                                                                         
 [101] "Tax rates/Tariffs"                                                                                              
 [102] "Interstate Commerce/Anti-trust/Restraint of Commerce"                                                           
 [103] "Immigration/Naturalization/Interstate Commerce/Anti-trust/Restraint of Commerce"                                
 [104] "Agriculture/Civil Rights/Desegregation/Busing/Affirmative Action"                                               
 [105] "Fish and Wildlife/Treaties"                                                                                     
 [106] "Agriculture/Interstate Commerce/Anti-trust/Restraint of Commerce"                                               
 [107] "Education/Women's Equality"                                                                                     
 [108] "Immigration/Naturalization/Treaties"                                                                            
 [109] "Treaties/Tariffs"                                                                                               
 [110] "Tax rates/Education"                                                                                            
 [111] "Tariffs/U.S. Currency"                                                                                          
 [112] "Tariffs/Shipping/Maritime"                                                                                      
 [113] "Coal Mining Regulation/Strip Mining/Black Lung/Religion"                                                        
 [114] "Religion/Voting Rights"                                                                                         
 [115] "Religion/Civil Rights/Desegregation/Busing/Affirmative Action"                                                  
 [116] "Alien and Sedition Laws/Immigration/Naturalization"                                                             
 [117] "Panama Canal/Treaties"                                                                                          
 [118] "Interstate Commerce/Anti-trust/Restraint of Commerce/Judiciary"                                                 
 [119] "Interstate Commerce/Anti-trust/Restraint of Commerce/Shipping/Maritime"                                         
 [120] "Immigration/Naturalization/Agriculture"                                                                         
 [121] "Treaties/Judiciary"                                                                                             
 [122] "Treaties/Public Lands"                                                                                          
 [123] "Union Regulation/Davis-Bacon/Situs Picketing"                                                                   
 [124] "Central America"                                                                                                
 [125] "Agriculture/Parks and Conservation"                                                                             
 [126] "Education/Temperance and Liquor"                                                                                
 [127] "Civil Rights/Desegregation/Busing/Affirmative Action/Education"                                                 
 [128] "Military Pensions/Veterans Benefits/Civil Service and Patronage"                                                
 [129] "Temperance and Liquor/Tax rates"                                                                                
 [130] "Interstate Commerce/Anti-trust/Restraint of Commerce/Religion"                                                  
 [131] "Military Pensions/Veterans Benefits/Tax rates"                                                                  
 [132] "Tax rates/Agriculture"                                                                                          
 [133] "Agriculture/Public Lands"                                                                                       
 [134] "Agriculture/Science and Technology"                                                                             
 [135] "Public Works/Pollution and Environmental Protection"                                                            
 [136] "Public Lands/Pollution and Environmental Protection"                                                            
 [137] "Public Works/Fish and Wildlife"                                                                                 
 [138] "Women's Equality/Constitutional Amendments"                                                                     
 [139] "Public Lands/Parks and Conservation"                                                                            
 [140] "Central America/Shipping/Maritime"                                                                              
 [141] "Agriculture/Public Health"                                                                                      
 [142] "Public Lands/Judiciary"                                                                                         
 [143] "Judiciary/Public Lands"                                                                                         
 [144] "Education/Tax rates"                                                                                            
 [145] "Banking and Finance/Treaties"                                                                                   
 [146] "Tax rates/Public Works"                                                                                         
 [147] "Unemployment/Jobs/Agriculture"                                                                                  
 [148] "Minimum Wage/Workplace conditions/8 hour day"                                                                   
 [149] "Immigration/Naturalization/Minorities (non-black)"                                                              
 [150] "Treaties/Immigration/Naturalization"                                                                            
 [151] "Agriculture/Tariffs"                                                                                            
 [152] "Fish and Wildlife/Tariffs"                                                                                      
 [153] "Shipping/Maritime/Tariffs"                                                                                      
 [154] "Constitutional Amendments/Temperance and Liquor"                                                                
 [155] "Tariffs/Interstate Commerce/Anti-trust/Restraint of Commerce"                                                   
 [156] "Judiciary/Interstate Commerce/Anti-trust/Restraint of Commerce"                                                 
 [157] "Judiciary/Tariffs"                                                                                              
 [158] "Education/Religion"                                                                                             
 [159] "Supreme Court/Judiciary"                                                                                        
 [160] "Shipping/Maritime/Interstate Commerce/Anti-trust/Restraint of Commerce"                                         
 [161] "Pollution and Environmental Protection/Public Lands"                                                            
 [162] "Agriculture/Judiciary"                                                                                          
 [163] "Treaties/Minorities (non-black)"                                                                                
 [164] "Firearms"                                                                                                       
 [165] "Immigration/Naturalization/Public Health"                                                                       
 [166] "Public Safety/Interstate Commerce/Anti-trust/Restraint of Commerce"                                             
 [167] "Gasoline rationing/allocation/Education"                                                                        
 [168] "Tax rates/U.S. Currency"                                                                                        
 [169] "Religion/Temperance and Liquor"                                                                                 
 [170] "Tariffs/Narcotics"                                                                                              
 [171] "Tariffs/Firearms"                                                                                               
 [172] "Science and Technology/Tariffs"                                                                                 
 [173] "Tariffs/Treaties"                                                                                               
 [174] "Religion/Education"                                                                                             
 [175] "Tax rates/Immigration/Naturalization"                                                                           
 [176] "Interstate Commerce/Anti-trust/Restraint of Commerce/Tariffs"                                                   
 [177] "Tax rates/Interstate Commerce/Anti-trust/Restraint of Commerce"                                                 
 [178] "Tax rates/Constitutional Amendments"                                                                            
 [179] "Tax rates/Judiciary"                                                                                            
 [180] "Public Works/Workplace conditions/8 hour day"                                                                   
 [181] "Human Rights/Treaties"                                                                                          
 [182] "Human Rights"                                                                                                   
 [183] "Amnesty (all wars)/Treaties"                                                                                    
 [184] "Human Rights/Judiciary"                                                                                         
 [185] "Union Regulation/Davis-Bacon/Situs Picketing/Judiciary"                                                         
 [186] "Congressional Pay and Benefits/Judiciary"                                                                       
 [187] "Shipping/Maritime/Minorities (non-black)"                                                                       
 [188] "Panama Canal/Banking and Finance"                                                                               
 [189] "Public Lands/Treaties"                                                                                          
 [190] "Agriculture/Public Safety"                                                                                      
 [191] "Impeachments and Investigations/Judiciary"                                                                      
 [192] "Agriculture/Temperance and Liquor"                                                                              
 [193] "Interstate Commerce/Anti-trust/Restraint of Commerce/Public Safety"                                             
 [194] "Interstate Commerce/Anti-trust/Restraint of Commerce/Congressional Pay and Benefits"                            
 [195] "Banking and Finance/Interstate Commerce/Anti-trust/Restraint of Commerce"                                       
 [196] "Public Lands/Public Works"                                                                                      
 [197] "Workplace conditions/8 hour day/Children (aid, infant mortality, etc.)"                                         
 [198] "Voting Rights/Temperance and Liquor"                                                                            
 [199] "Workplace conditions/8 hour day/Judiciary"                                                                      
 [200] "Coal Mining Regulation/Strip Mining/Black Lung"                                                                 
 [201] "Education/Tariffs"                                                                                              
 [202] "Tariffs/Judiciary"                                                                                              
 [203] "Public Lands/Interstate Commerce/Anti-trust/Restraint of Commerce"                                              
 [204] "Public Lands/Banking and Finance"                                                                               
 [205] "Shipping/Maritime/Judiciary"                                                                                    
 [206] "Education/Children (aid, infant mortality, etc.)"                                                               
 [207] "Shipping/Maritime/Immigration/Naturalization"                                                                   
 [208] "Civil Rights/Desegregation/Busing/Affirmative Action/Immigration/Naturalization"                                
 [209] "Workplace conditions/8 hour day/Interstate Commerce/Anti-trust/Restraint of Commerce"                           
 [210] "Fish and Wildlife/Agriculture"                                                                                  
 [211] "Unemployment/Jobs/Education"                                                                                    
 [212] "Union Regulation/Davis-Bacon/Situs Picketing/Workplace conditions/8 hour day"                                   
 [213] "Interstate Commerce/Anti-trust/Restraint of Commerce/Temperance and Liquor"                                     
 [214] "Agriculture/Education"                                                                                          
 [215] "Constitutional Amendments/Impeachment of President"                                                             
 [216] "Electoral Votes/Constitutional Amendments"                                                                      
 [217] "Temperance and Liquor/Interstate Commerce/Anti-trust/Restraint of Commerce"                                     
 [218] "Public Works/Interstate Commerce/Anti-trust/Restraint of Commerce"                                              
 [219] "Tax rates/Civil Service and Patronage"                                                                          
 [220] "National Bank/U.S. Currency"                                                                                    
 [221] "Civil Service and Patronage/National Bank"                                                                      
 [222] "Tax rates/National Bank"                                                                                        
 [223] "National Bank/Voting Rights"                                                                                    
 [224] "Coal Mining Regulation/Strip Mining/Black Lung/Public Lands"                                                    
 [225] "Panama Canal/Human Rights"                                                                                      
 [226] "Union Regulation/Davis-Bacon/Situs Picketing/Interstate Commerce/Anti-trust/Restraint of Commerce"              
 [227] "Religion/Immigration/Naturalization"                                                                            
 [228] "Agriculture/Immigration/Naturalization"                                                                         
 [229] "Panama Canal/Congressional Pay and Benefits"                                                                    
 [230] "Panama Canal"                                                                                                   
 [231] "WWI"                                                                                                            
 [232] "Agriculture/Treaties"                                                                                           
 [233] "Gasoline rationing/allocation/Interstate Commerce/Anti-trust/Restraint of Commerce"                             
 [234] "Agriculture/Banking and Finance"                                                                                
 [235] "Agriculture/Workplace conditions/8 hour day"                                                                    
 [236] "Agriculture/Civil Service and Patronage"                                                                        
 [237] "Housing/Housing Programs/Rent Control"                                                                          
 [238] "Fish and Wildlife/Parks and Conservation"                                                                       
 [239] "Interstate Commerce/Anti-trust/Restraint of Commerce/Children (aid, infant mortality, etc.)"                    
 [240] "Women's Equality/Temperance and Liquor"                                                                         
 [241] "Treaties/Voting Rights"                                                                                         
 [242] "Tariffs/Children (aid, infant mortality, etc.)"                                                                 
 [243] "Banking and Finance/WWI"                                                                                        
 [244] "WWI/CIA/Spying/Intelligence"                                                                                    
 [245] "Selective Service (The Draft)/WWI"                                                                              
 [246] "Shipping/Maritime/WWI"                                                                                          
 [247] "Temperance and Liquor/WWI"                                                                                      
 [248] "Civil Service and Patronage/Agriculture"                                                                        
 [249] "Gasoline rationing/allocation/WWI"                                                                              
 [250] "Agriculture/WWI"                                                                                                
 [251] "Civil Service and Patronage/WWI"                                                                                
 [252] "Interstate Commerce/Anti-trust/Restraint of Commerce/WWI"                                                       
 [253] "Agriculture/Price Controls"                                                                                     
 [254] "Gasoline rationing/allocation/Price Controls"                                                                   
 [255] "Public Works/WWI"                                                                                               
 [256] "Tax rates/WWI"                                                                                                  
 [257] "Tariffs/WWI"                                                                                                    
 [258] "Fish and Wildlife/Electoral Votes"                                                                              
 [259] "Civil Rights/Desegregation/Busing/Affirmative Action/WWI"                                                       
 [260] "Alien and Sedition Laws/WWI"                                                                                    
 [261] "Emergency Fuel Assistance"                                                                                      
 [262] "South Africa/Rhodesia/Amnesty (all wars)"                                                                       
 [263] "South Africa/Rhodesia"                                                                                          
 [264] "Tax rates/Workplace conditions/8 hour day"                                                                      
 [265] "Immigration/Naturalization/WWI"                                                                                 
 [266] "Women's Equality/WWI"                                                                                           
 [267] "Education/WWI"                                                                                                  
 [268] "Women's Equality/Minimum Wage"                                                                                  
 [269] "Tax rates/Campaign Contributions, Lobbying/House Ethics Campaign Laws"                                          
 [270] "Humanitarian Assistance (foreign)/WWI"                                                                          
 [271] "Election of House Officers/Banking and Finance"                                                                 
 [272] "Treaties/WWI"                                                                                                   
 [273] "Shipping/Maritime/Banking and Finance"                                                                          
 [274] "Coal Mining Regulation/Strip Mining/Black Lung/Workplace conditions/8 hour day"                                 
 [275] "Arms Control/Treaties"                                                                                          
 [276] "Tax rates/Shipping/Maritime"                                                                                    
 [277] "Children (aid, infant mortality, etc.)/Public Health"                                                           
 [278] "Tax rates/Public Lands"                                                                                         
 [279] "Arms Control"                                                                                                   
 [280] "Military Pensions/Veterans Benefits/WWI"                                                                        
 [281] "Food Stamps/Food Programs/Housing/Housing Programs/Rent Control"                                                
 [282] "Agriculture/Public Works"                                                                                       
 [283] "Tax rates/Military Pensions/Veterans Benefits"                                                                  
 [284] "Religion/Military Pensions/Veterans Benefits"                                                                   
 [285] "Campaign Contributions, Lobbying/House Ethics Campaign Laws/Disputed Elections to Congress"                     
 [286] "Agriculture/Voting Rights"                                                                                      
 [287] "Interstate Commerce/Anti-trust/Restraint of Commerce/Airlines/Airports/Airline Industry"                        
 [288] "Unemployment/Jobs/Public Works"                                                                                 
 [289] "Treaties/Temperance and Liquor"                                                                                 
 [290] "Price Controls"                                                                                                 
 [291] "Fish and Wildlife/Public Lands"                                                                                 
 [292] "Union Regulation/Davis-Bacon/Situs Picketing/Agriculture"                                                       
 [293] "Interstate Commerce/Anti-trust/Restraint of Commerce/Public Health"                                             
 [294] "Narcotics"                                                                                                      
 [295] "Election of House Officers/Interstate Commerce/Anti-trust/Restraint of Commerce"                                
 [296] "Public Lands/Impeachments and Investigations"                                                                   
 [297] "Education/Immigration/Naturalization"                                                                           
 [298] "Constitutional Amendments/Workplace conditions/8 hour day"                                                      
 [299] "Civil Rights/Desegregation/Busing/Affirmative Action/Parks and Conservation"                                    
 [300] "Immigration/Naturalization/Judiciary"                                                                           
 [301] "Radio/Television/Motion Pictures/Telecommunications"                                                            
 [302] "Airlines/Airports/Airline Industry"                                                                             
 [303] "Banking and Finance/Judiciary"                                                                                  
 [304] "Energy"                                                                                                         
 [305] "Agriculture/Handicapped"                                                                                        
 [306] "Impeachments and Investigations/Tariffs"                                                                        
 [307] "Agriculture/Energy"                                                                                             
 [308] "Union Regulation/Davis-Bacon/Situs Picketing/Banking and Finance"                                               
 [309] "Treaties/Fish and Wildlife"                                                                                     
 [310] "Impeachments and Investigations/Disputed Elections to Congress"                                                 
 [311] "Banking and Finance/Military Pensions/Veterans Benefits"                                                        
 [312] "Education/Military Pensions/Veterans Benefits"                                                                  
 [313] "Parks and Conservation/Public Works"                                                                            
 [314] "Impeachments and Investigations/Campaign Contributions, Lobbying/House Ethics Campaign Laws"                    
 [315] "Tax rates/Energy"                                                                                               
 [316] "Tax rates/Narcotics"                                                                                            
 [317] "Public Works/Energy"                                                                                            
 [318] "Peace Movements/Pacifism/Anti-Military"                                                                         
 [319] "Consumer Protection Agency/Consumer Protection/Tariffs"                                                         
 [320] "Alien and Sedition Laws/Tariffs"                                                                                
 [321] "Tariffs/Radio/Television/Motion Pictures/Telecommunications"                                                    
 [322] "Unemployment/Jobs/Immigration/Naturalization"                                                                   
 [323] "Tariffs/Exchange Rates"                                                                                         
 [324] "Unemployment/Jobs/Judiciary"                                                                                    
 [325] "Unemployment/Jobs/Banking and Finance"                                                                          
 [326] "Unemployment/Jobs/Union Regulation/Davis-Bacon/Situs Picketing"                                                 
 [327] "Banking and Finance/Exchange Rates"                                                                             
 [328] "Gasoline rationing/allocation/Tax rates"                                                                        
 [329] "Banking and Finance/Energy"                                                                                     
 [330] "Banking and Finance/Housing/Housing Programs/Rent Control"                                                      
 [331] "Social Security"                                                                                                
 [332] "Workplace conditions/8 hour day/Public Works"                                                                   
 [333] "Unemployment/Jobs/Workplace conditions/8 hour day"                                                              
 [334] "Banking and Finance/States Rights vs. Federal Government"                                                       
 [335] "Interstate Commerce/Anti-trust/Restraint of Commerce/Energy"                                                    
 [336] "Tax rates/Coal Mining Regulation/Strip Mining/Black Lung"                                                       
 [337] "Panama Canal/Shipping/Maritime"                                                                                 
 [338] "Education/Airlines/Airports/Airline Industry"                                                                   
 [339] "Pollution and Environmental Protection/Agriculture"                                                             
 [340] "Judiciary/Impeachment of President"                                                                             
 [341] "Shipping/Maritime/Public Works"                                                                                 
 [342] "Coal Mining Regulation/Strip Mining/Black Lung/Interstate Commerce/Anti-trust/Restraint of Commerce"            
 [343] "Communists/Communism/Unamerican Activities"                                                                     
 [344] "Pollution and Environmental Protection"                                                                         
 [345] "Union Regulation/Davis-Bacon/Situs Picketing/Coal Mining Regulation/Strip Mining/Black Lung"                    
 [346] "Pollution and Environmental Protection/Parks and Conservation"                                                  
 [347] "Civil Rights/Desegregation/Busing/Affirmative Action/Interstate Commerce/Anti-trust/Restraint of Commerce"      
 [348] "Workplace conditions/8 hour day/Housing/Housing Programs/Rent Control"                                          
 [349] "Banking and Finance/Public Works"                                                                               
 [350] "Tax rates/Social Security"                                                                                      
 [351] "Unemployment/Jobs/Energy"                                                                                       
 [352] "Unemployment/Jobs/Social Security"                                                                              
 [353] "Unemployment/Jobs/Price Controls"                                                                               
 [354] "Unemployment/Jobs/Minimum Wage"                                                                                 
 [355] "Civil Service and Patronage/Social Security"                                                                    
 [356] "Consumer Protection Agency/Consumer Protection"                                                                 
 [357] "Union Regulation/Davis-Bacon/Situs Picketing/Civil Rights/Desegregation/Busing/Affirmative Action"              
 [358] "Campaign Contributions, Lobbying/House Ethics Campaign Laws/States Rights vs. Federal Government"               
 [359] "Civil Service and Patronage/Campaign Contributions, Lobbying/House Ethics Campaign Laws"                        
 [360] "Agriculture/U.S. Currency"                                                                                      
 [361] "Civil Rights/Desegregation/Busing/Affirmative Action/Selective Service (The Draft)"                             
 [362] "Selective Service (The Draft)/Military Pensions/Veterans Benefits"                                              
 [363] "Tax rates/Voting Rights"                                                                                        
 [364] "Selective Service (The Draft)/Temperance and Liquor"                                                            
 [365] "Agriculture/Selective Service (The Draft)"                                                                      
 [366] "Selective Service (The Draft)/Education"                                                                        
 [367] "U. N."                                                                                                          
 [368] "Interstate Commerce/Anti-trust/Restraint of Commerce/Radio/Television/Motion Pictures/Telecommunications"       
 [369] "Agriculture/Exchange Rates"                                                                                     
 [370] "Budget resolution /Amnesty (all wars)"                                                                          
 [371] "Unemployment/Jobs/Tax rates"                                                                                    
 [372] "Fish and Wildlife/Women's Equality"                                                                             
 [373] "Banking and Finance/Radio/Television/Motion Pictures/Telecommunications"                                        
 [374] "Voting Rights/States Rights vs. Federal Government"                                                             
 [375] "Tax rates/Union Regulation/Davis-Bacon/Situs Picketing"                                                         
 [376] "U. N./Humanitarian Assistance (foreign)"                                                                        
 [377] "Radio/Television/Motion Pictures/Telecommunications/CIA/Spying/Intelligence"                                    
 [378] "Civil Rights/Desegregation/Busing/Affirmative Action/Minorities (non-black)"                                    
 [379] "Unemployment/Jobs/Military Pensions/Veterans Benefits"                                                          
 [380] "Nuclear Weapons"                                                                                                
 [381] "Nuclear Power"                                                                                                  
 [382] "Tax rates/Congressional Pay and Benefits"                                                                       
 [383] "U. N./Agriculture"                                                                                              
 [384] "U. N./Treaties"                                                                                                 
 [385] "Tax rates/Unemployment/Jobs"                                                                                    
 [386] "Nuclear Power/Energy"                                                                                           
 [387] "Tax rates/Price Controls"                                                                                       
 [388] "Arms Control/U. N."                                                                                             
 [389] "Education/Radio/Television/Motion Pictures/Telecommunications"                                                  
 [390] "Housing/Housing Programs/Rent Control/Price Controls"                                                           
 [391] "Military Pensions/Veterans Benefits/Housing/Housing Programs/Rent Control"                                      
 [392] "Public Works/Housing/Housing Programs/Rent Control"                                                             
 [393] "Treaties/U.S. Currency"                                                                                         
 [394] "Union Regulation/Davis-Bacon/Situs Picketing/Public Safety"                                                     
 [395] "Union Regulation/Davis-Bacon/Situs Picketing/Selective Service (The Draft)"                                     
 [396] "Unemployment/Jobs/States Rights vs. Federal Government"                                                         
 [397] "Unemployment/Jobs/Civil Service and Patronage"                                                                  
 [398] "Education/Science and Technology"                                                                               
 [399] "States Rights vs. Federal Government/Housing/Housing Programs/Rent Control"                                     
 [400] "Unemployment/Jobs/CIA/Spying/Intelligence"                                                                      
 [401] "Public Lands/Supreme Court"                                                                                     
 [402] "Children (aid, infant mortality, etc.)/CIA/Spying/Intelligence"                                                 
 [403] "Union Regulation/Davis-Bacon/Situs Picketing/Welfare"                                                           
 [404] "Tax rates/Housing/Housing Programs/Rent Control"                                                                
 [405] "Interstate Commerce/Anti-trust/Restraint of Commerce/Price Controls"                                            
 [406] "Humanitarian Assistance (foreign)/Children (aid, infant mortality, etc.)"                                       
 [407] "Religion/Peace Movements/Pacifism/Anti-Military"                                                                
 [408] "CIA/Spying/Intelligence"                                                                                        
 [409] "Korean War"                                                                                                     
 [410] "Civil Rights/Desegregation/Busing/Affirmative Action/Housing/Housing Programs/Rent Control"                     
 [411] "Education/CIA/Spying/Intelligence"                                                                              
 [412] "Treaties/Nuclear Weapons"                                                                                       
 [413] "Civil Rights/Desegregation/Busing/Affirmative Action/Agriculture"                                               
 [414] "Civil Rights/Desegregation/Busing/Affirmative Action/Workplace conditions/8 hour day"                           
 [415] "Unemployment/Jobs/Children (aid, infant mortality, etc.)"                                                       
 [416] "Minimum Wage/Price Controls"                                                                                    
 [417] "Union Regulation/Davis-Bacon/Situs Picketing/Price Controls"                                                    
 [418] "Treaties/Airlines/Airports/Airline Industry"                                                                    
 [419] "Civil Service and Patronage/Workplace conditions/8 hour day"                                                    
 [420] "Education/Public Health"                                                                                        
 [421] "Public Works/Central America"                                                                                   
 [422] "Minimum Wage/Housing/Housing Programs/Rent Control"                                                             
 [423] "Agriculture/Housing/Housing Programs/Rent Control"                                                              
 [424] "Union Regulation/Davis-Bacon/Situs Picketing/Housing/Housing Programs/Rent Control"                             
 [425] "Science and Technology/Airlines/Airports/Airline Industry"                                                      
 [426] "Constitutional Amendments/Treaties"                                                                             
 [427] "U. N./Communists/Communism/Unamerican Activities"                                                               
 [428] "Agriculture/Humanitarian Assistance (foreign)"                                                                  
 [429] "Human Rights/Supreme Court"                                                                                     
 [430] "Public Works/Radio/Television/Motion Pictures/Telecommunications"                                               
 [431] "Nuclear Power/Education"                                                                                        
 [432] "Union Regulation/Davis-Bacon/Situs Picketing/Communists/Communism/Unamerican Activities"                        
 [433] "Taiwan (1979-80)"                                                                                               
 [434] "Minimum Wage"                                                                                                   
 [435] "Treaties/Communists/Communism/Unamerican Activities"                                                            
 [436] "Tax rates/Treaties"                                                                                             
 [437] "Treaties/Radio/Television/Motion Pictures/Telecommunications"                                                   
 [438] "Consumer Protection Agency/Consumer Protection/Energy"                                                          
 [439] "Price Controls/Energy"                                                                                          
 [440] "Temperance and Liquor/Public Works"                                                                             
 [441] "Education/Housing/Housing Programs/Rent Control"                                                                
 [442] "Space Exploration/NASA"                                                                                         
 [443] "Civil Rights/Desegregation/Busing/Affirmative Action/Judiciary"                                                 
 [444] "National Bank/Housing/Housing Programs/Rent Control"                                                            
 [445] "Agriculture/Communists/Communism/Unamerican Activities"                                                         
 [446] "Education/Public Works"                                                                                         
 [447] "Union Regulation/Davis-Bacon/Situs Picketing/Voting Rights"                                                     
 [448] "Agriculture/Religion"                                                                                           
 [449] "Treaties/Tax rates"                                                                                             
 [450] "States Rights vs. Federal Government/Communists/Communism/Unamerican Activities"                                
 [451] "Unemployment/Jobs/Shipping/Maritime"                                                                            
 [452] "Handicapped"                                                                                                    
 [453] "Public Safety/Airlines/Airports/Airline Industry"                                                               
 [454] "Science and Technology/Public Health"                                                                           
 [455] "Public Works/Airlines/Airports/Airline Industry"                                                                
 [456] "Workplace conditions/8 hour day/Public Health"                                                                  
 [457] "Civil Rights/Desegregation/Busing/Affirmative Action/Religion"                                                  
 [458] "Gasoline rationing/allocation/Public Lands"                                                                     
 [459] "Food Stamps/Food Programs/Agriculture"                                                                          
 [460] "Food Stamps/Food Programs/Peace Movements/Pacifism/Anti-Military"                                               
 [461] "Food Stamps/Food Programs/Education"                                                                            
 [462] "Fish and Wildlife/Shipping/Maritime"                                                                            
 [463] "Nuclear Power/Space Exploration/NASA"                                                                           
 [464] "Housing/Housing Programs/Rent Control/Debt Ceilings"                                                            
 [465] "Shipping/Maritime/Treaties"                                                                                     
 [466] "Shipping/Maritime/Debt Ceilings"                                                                                
 [467] "Tax rates/Debt Ceilings"                                                                                        
 [468] "Civil Rights/Desegregation/Busing/Affirmative Action/Airlines/Airports/Airline Industry"                        
 [469] "Science and Technology/Space Exploration/NASA"                                                                  
 [470] "Nuclear Power/Public Works"                                                                                     
 [471] "Public Health/Social Security"                                                                                  
 [472] "Fish and Wildlife/Military Pensions/Veterans Benefits"                                                          
 [473] "Minimum Wage/Tariffs"                                                                                           
 [474] "Fish and Wildlife/Pollution and Environmental Protection"                                                       
 [475] "Shipping/Maritime/Airlines/Airports/Airline Industry"                                                           
 [476] "Military Pensions/Veterans Benefits/Public Health"                                                              
 [477] "Education/Workplace conditions/8 hour day"                                                                      
 [478] "Arms Control/Peace Movements/Pacifism/Anti-Military"                                                            
 [479] "Agriculture/Minimum Wage"                                                                                       
 [480] "Fish and Wildlife/Airlines/Airports/Airline Industry"                                                           
 [481] "Welfare/Banking and Finance"                                                                                    
 [482] "Nuclear Power/Science and Technology"                                                                           
 [483] "Shipping/Maritime/Public Safety"                                                                                
 [484] "Welfare/Public Health"                                                                                          
 [485] "Welfare/Social Security"                                                                                        
 [486] "Civil Rights/Desegregation/Busing/Affirmative Action/Public Health"                                             
 [487] "Science and Technology/Radio/Television/Motion Pictures/Telecommunications"                                     
 [488] "Agriculture/Radio/Television/Motion Pictures/Telecommunications"                                                
 [489] "Narcotics/Radio/Television/Motion Pictures/Telecommunications"                                                  
 [490] "U. N./Radio/Television/Motion Pictures/Telecommunications"                                                      
 [491] "Radio/Television/Motion Pictures/Telecommunications/Space Exploration/NASA"                                     
 [492] "Civil Rights/Desegregation/Busing/Affirmative Action/Radio/Television/Motion Pictures/Telecommunications"       
 [493] "Fish and Wildlife/Banking and Finance"                                                                          
 [494] "Women's Equality/Whiskey Rebellion"                                                                             
 [495] "Impeachments and Investigations/Housing/Housing Programs/Rent Control"                                          
 [496] "Fish and Wildlife/Coal Mining Regulation/Strip Mining/Black Lung"                                               
 [497] "Agriculture/Children (aid, infant mortality, etc.)"                                                             
 [498] "Tax rates/Airlines/Airports/Airline Industry"                                                                   
 [499] "Unemployment/Jobs/Public Health"                                                                                
 [500] "Education/Peace Movements/Pacifism/Anti-Military"                                                               
 [501] "Banking and Finance/Communists/Communism/Unamerican Activities"                                                 
 [502] "Pollution and Environmental Protection/Treaties"                                                                
 [503] "Civil Rights/Desegregation/Busing/Affirmative Action/Children (aid, infant mortality, etc.)"                    
 [504] "Women's Equality/Civil Rights/Desegregation/Busing/Affirmative Action"                                          
 [505] "Education/Civil Rights/Desegregation/Busing/Affirmative Action"                                                 
 [506] "Fish and Wildlife/Tax rates"                                                                                    
 [507] "Campaign Contributions, Lobbying/House Ethics Campaign Laws/Congressional Pay and Benefits"                     
 [508] "Welfare/Agriculture"                                                                                            
 [509] "Welfare/Children (aid, infant mortality, etc.)"                                                                 
 [510] "Vietnam War"                                                                                                    
 [511] "Pollution and Environmental Protection/Public Health"                                                           
 [512] "Human Rights/Religion"                                                                                          
 [513] "Agriculture/Military Pensions/Veterans Benefits"                                                                
 [514] "Voting Rights/Judiciary"                                                                                        
 [515] "Education/Voting Rights"                                                                                        
 [516] "Constitutional Amendments/Ratio of Representatives to Population"                                               
 [517] "Education/Judiciary"                                                                                            
 [518] "Welfare/Education"                                                                                              
 [519] "Unemployment/Jobs/Welfare"                                                                                      
 [520] "Pollution and Environmental Protection/Public Works"                                                            
 [521] "Selective Service (The Draft)/Vietnam War"                                                                      
 [522] "Tax rates/Radio/Television/Motion Pictures/Telecommunications"                                                  
 [523] "Welfare/Housing/Housing Programs/Rent Control"                                                                  
 [524] "Fish and Wildlife/Science and Technology"                                                                       
 [525] "Coal Mining Regulation/Strip Mining/Black Lung/Public Safety"                                                   
 [526] "Food Stamps/Food Programs/Children (aid, infant mortality, etc.)"                                               
 [527] "Union Regulation/Davis-Bacon/Situs Picketing/Airlines/Airports/Airline Industry"                                
 [528] "Tax rates/CIA/Spying/Intelligence"                                                                              
 [529] "Religion/Constitutional Amendments"                                                                             
 [530] "Welfare/Civil Rights/Desegregation/Busing/Affirmative Action"                                                   
 [531] "Unemployment/Jobs/Civil Rights/Desegregation/Busing/Affirmative Action"                                         
 [532] "Unemployment/Jobs/Communists/Communism/Unamerican Activities"                                                   
 [533] "Vietnam War/Treaties"                                                                                           
 [534] "Treaties/Space Exploration/NASA"                                                                                
 [535] "Treaties/Narcotics"                                                                                             
 [536] "Public Health/Handicapped"                                                                                      
 [537] "U. N./Vietnam War"                                                                                              
 [538] "Communists/Communism/Unamerican Activities/CIA/Spying/Intelligence"                                             
 [539] "Children (aid, infant mortality, etc.)/Social Security"                                                         
 [540] "Minimum Wage/Social Security"                                                                                   
 [541] "Civil Rights/Desegregation/Busing/Affirmative Action/Firearms"                                                  
 [542] "Tax rates/Communists/Communism/Unamerican Activities"                                                           
 [543] "Tax rates/Space Exploration/NASA"                                                                               
 [544] "Interstate Commerce/Anti-trust/Restraint of Commerce/Firearms"                                                  
 [545] "Vietnam War/Judiciary"                                                                                          
 [546] "Pollution and Environmental Protection/Airlines/Airports/Airline Industry"                                      
 [547] "Tax rates/Welfare"                                                                                              
 [548] "OSHA"                                                                                                           
 [549] "SST"                                                                                                            
 [550] "Arms Control/Nuclear Weapons"                                                                                   
 [551] "Treaties/Energy"                                                                                                
 [552] "Arms Control/Science and Technology"                                                                            
 [553] "Tax rates/Firearms"                                                                                             
 [554] "Treaties/Public Works"                                                                                          
 [555] "Housing/Housing Programs/Rent Control/Space Exploration/NASA"                                                   
 [556] "Tax rates/Public Health"                                                                                        
 [557] "SST/Airlines/Airports/Airline Industry"                                                                         
 [558] "Congressional Pay and Benefits/Debt Ceilings"                                                                   
 [559] "Pollution and Environmental Protection/Public Safety"                                                           
 [560] "Pollution and Environmental Protection/Education"                                                               
 [561] "Education/Narcotics"                                                                                            
 [562] "B-1 Bomber"                                                                                                     
 [563] "School Prayer/Constitutional Amendments"                                                                        
 [564] "Social Security/Debt Ceilings"                                                                                  
 [565] "Selective Service (The Draft)/Treaties"                                                                         
 [566] "Selective Service (The Draft)/Voting Rights"                                                                    
 [567] "Temperance and Liquor/Narcotics"                                                                                
 [568] "Selective Service (The Draft)/Public Health"                                                                    
 [569] "Agriculture/Consumer Protection Agency/Consumer Protection"                                                     
 [570] "Nuclear Power/Nuclear Weapons"                                                                                  
 [571] "South Africa/Rhodesia/Tariffs"                                                                                  
 [572] "Education/Banking and Finance"                                                                                  
 [573] "Campaign Contributions, Lobbying/House Ethics Campaign Laws/Radio/Television/Motion Pictures/Telecommunications"
 [574] "Women's Equality/Education"                                                                                     
 [575] "Pollution and Environmental Protection/Radio/Television/Motion Pictures/Telecommunications"                     
 [576] "U. N./Food Stamps/Food Programs"                                                                                
 [577] "Consumer Protection Agency/Consumer Protection/Public Safety"                                                   
 [578] "Banking and Finance/Humanitarian Assistance (foreign)"                                                          
 [579] "Debt Ceilings/CIA/Spying/Intelligence"                                                                          
 [580] "Food Stamps/Food Programs/Social Security"                                                                      
 [581] "Radio/Television/Motion Pictures/Telecommunications/Price Controls"                                             
 [582] "Minimum Wage/Radio/Television/Motion Pictures/Telecommunications"                                               
 [583] "Fish and Wildlife/Public Safety"                                                                                
 [584] "Unemployment/Jobs/Religion"                                                                                     
 [585] "Unemployment/Jobs/Radio/Television/Motion Pictures/Telecommunications"                                          
 [586] "Pollution and Environmental Protection/Nuclear Weapons"                                                         
 [587] "Nuclear Power/Treaties"                                                                                         
 [588] "Impeachments and Investigations/Supreme Court"                                                                  
 [589] "Science and Technology/Children (aid, infant mortality, etc.)"                                                  
 [590] "Consumer Protection Agency/Consumer Protection/Interstate Commerce/Anti-trust/Restraint of Commerce"            
 [591] "Radio/Television/Motion Pictures/Telecommunications/Debt Ceilings"                                              
 [592] "Unemployment/Jobs/OSHA"                                                                                         
 [593] "Welfare/Judiciary"                                                                                              
 [594] "Welfare/Consumer Protection Agency/Consumer Protection"                                                         
 [595] "Vietnam War/Humanitarian Assistance (foreign)"                                                                  
 [596] "Panama Canal/Minimum Wage"                                                                                      
 [597] "Civil Rights/Desegregation/Busing/Affirmative Action/Minimum Wage"                                              
 [598] "Vietnam War/Military Pensions/Veterans Benefits"                                                                
 [599] "Public Health/Narcotics"                                                                                        
 [600] "U.S. Currency/Judiciary"                                                                                        
 [601] "Unemployment/Jobs/Handicapped"                                                                                  
 [602] "Social Security/Handicapped"                                                                                    
 [603] "Military Pensions/Veterans Benefits/Social Security"                                                            
 [604] "Judiciary/Social Security"                                                                                      
 [605] "Pollution and Environmental Protection/SST"                                                                     
 [606] "Unemployment/Jobs/Debt Ceilings"                                                                                
 [607] "Abortion/Care of deformed newborns"                                                                             
 [608] "Workplace conditions/8 hour day/Price Controls"                                                                 
 [609] "Abortion/Care of deformed newborns/Public Health"                                                               
 [610] "Exchange Rates/U.S. Currency"                                                                                   
 [611] "Narcotics/Firearms"                                                                                             
 [612] "Budget resolution /U.S. Currency"                                                                               
 [613] "Pollution and Environmental Protection/Consumer Protection Agency/Consumer Protection"                          
 [614] "Military Pensions/Veterans Benefits/Space Exploration/NASA"                                                     
 [615] "Tariffs/Price Controls"                                                                                         
 [616] "Gasoline rationing/allocation/Energy"                                                                           
 [617] "Gasoline rationing/allocation/Pollution and Environmental Protection"                                           
 [618] "Gasoline rationing/allocation/Judiciary"                                                                        
 [619] "Consumer Protection Agency/Consumer Protection/Judiciary"                                                       
 [620] "Campaign Contributions, Lobbying/House Ethics Campaign Laws/Judiciary"                                          
 [621] "Food Stamps/Food Programs/Consumer Protection Agency/Consumer Protection"                                       
 [622] "Military Pensions/Veterans Benefits/Airlines/Airports/Airline Industry"                                         
 [623] "Abortion/Care of deformed newborns/Science and Technology"                                                      
 [624] "Science and Technology/Public Safety"                                                                           
 [625] "Public Safety/Radio/Television/Motion Pictures/Telecommunications"                                              
 [626] "Coal Mining Regulation/Strip Mining/Black Lung/Pollution and Environmental Protection"                          
 [627] "Gasoline rationing/allocation/Emergency Fuel Assistance"                                                        
 [628] "Gasoline rationing/allocation/Public Works"                                                                     
 [629] "Gasoline rationing/allocation/Airlines/Airports/Airline Industry"                                               
 [630] "Campaign Contributions, Lobbying/House Ethics Campaign Laws/Debt Ceilings"                                      
 [631] "Gasoline rationing/allocation/Social Security"                                                                  
 [632] "Science and Technology/Energy"                                                                                  
 [633] "Tax rates/Vietnam War"                                                                                          
 [634] "Budget resolution /Congressional Pay and Benefits"                                                              
 [635] "Union Regulation/Davis-Bacon/Situs Picketing/Campaign Contributions, Lobbying/House Ethics Campaign Laws"       
 [636] "Union Regulation/Davis-Bacon/Situs Picketing/Religion"                                                          
 [637] "Union Regulation/Davis-Bacon/Situs Picketing/OSHA"                                                              
 [638] "Union Regulation/Davis-Bacon/Situs Picketing/Food Stamps/Food Programs"                                         
 [639] "Banking and Finance/Debt Ceilings"                                                                              
 [640] "Union Regulation/Davis-Bacon/Situs Picketing/Consumer Protection Agency/Consumer Protection"                    
 [641] "Consumer Protection Agency/Consumer Protection/Radio/Television/Motion Pictures/Telecommunications"             
 [642] "OSHA/Workplace conditions/8 hour day"                                                                           
 [643] "Welfare/Workplace conditions/8 hour day"                                                                        
 [644] "Consumer Protection Agency/Consumer Protection/Public Health"                                                   
 [645] "Food Stamps/Food Programs/Welfare"                                                                              
 [646] "Education/Handicapped"                                                                                          
 [647] "Immigration/Naturalization/Communists/Communism/Unamerican Activities"                                          
 [648] "Breeder Reactor"                                                                                                
 [649] "School Prayer"                                                                                                  
 [650] "Coal Mining Regulation/Strip Mining/Black Lung/States Rights vs. Federal Government"                            
 [651] "Coal Mining Regulation/Strip Mining/Black Lung/Unemployment/Jobs"                                               
 [652] "Energy/Price Controls"                                                                                          
 [653] "Energy/Tax rates"                                                                                               
 [654] "Tax rates/Children (aid, infant mortality, etc.)"                                                               
 [655] "Energy/States Rights vs. Federal Government"                                                                    
 [656] "Social Security/Abortion/Care of deformed newborns"                                                             
 [657] "Energy/Civil Rights/Desegregation/Busing/Affirmative Action"                                                    
 [658] "Budget resolution /Tax rates"                                                                                   
 [659] "Consumer Protection Agency/Consumer Protection/Union Regulation/Davis-Bacon/Situs Picketing"                    
 [660] "Consumer Protection Agency/Consumer Protection/Firearms"                                                        
 [661] "Breeder Reactor/Nuclear Power"                                                                                  
 [662] "Nuclear Weapons/Arms Control"                                                                                   
 [663] "Education/Food Stamps/Food Programs"                                                                            
 [664] "Energy/Gasoline rationing/allocation"                                                                           
 [665] "Treaties/Shipping/Maritime"                                                                                     
 [666] "Treaties/Agriculture"                                                                                           
 [667] "Treaties/Arms Control"                                                                                          
 [668] "Union Regulation/Davis-Bacon/Situs Picketing/States Rights vs. Federal Government"                              
 [669] "Public Health/Science and Technology"                                                                           
 [670] "Judiciary/Nuclear Power"                                                                                        
 [671] "CIA/Spying/Intelligence/Communists/Communism/Unamerican Activities"                                             
 [672] "Women's Equality/Treaties"                                                                                      
 [673] "Coal Mining Regulation/Strip Mining/Black Lung/Parks and Conservation"                                          
 [674] "Communists/Communism/Unamerican Activities/Human Rights"                                                        
 [675] "Public Health/Children (aid, infant mortality, etc.)"                                                           
 [676] "Campaign Contributions, Lobbying/House Ethics Campaign Laws/Union Regulation/Davis-Bacon/Situs Picketing"       
 [677] "Campaign Contributions, Lobbying/House Ethics Campaign Laws/Supreme Court"                                      
 [678] "Social Security/Children (aid, infant mortality, etc.)"                                                         
 [679] "Food Stamps/Food Programs/Budget resolution "                                                                   
 [680] "Budget resolution /Agriculture"                                                                                 
 [681] "Abortion/Care of deformed newborns/Constitutional Amendments"                                                   
 [682] "Communists/Communism/Unamerican Activities/Arms Control"                                                        
 [683] "Social Security/Welfare"                                                                                        
 [684] "B-1 Bomber/Arms Control"                                                                                        
 [685] "Judiciary/States Rights vs. Federal Government"                                                                 
 [686] "Energy/Science and Technology"                                                                                  
 [687] "Nuclear Power/Breeder Reactor"                                                                                  
 [688] "Energy/Nuclear Power"                                                                                           
 [689] "Agriculture/OSHA"                                                                                               
 [690] "Energy/Pollution and Environmental Protection"                                                                  
 [691] "Pollution and Environmental Protection/States Rights vs. Federal Government"                                    
 [692] "Tax rates/Parks and Conservation"                                                                               
 [693] "Treaties/Parks and Conservation"                                                                                
 [694] "Treaties/Coal Mining Regulation/Strip Mining/Black Lung"                                                        
 [695] "Judiciary/Tax rates"                                                                                            
 [696] "Airlines/Airports/Airline Industry/Handicapped"                                                                 
 [697] "Homosexuality"                                                                                                  
 [698] "Neutron Bomb"                                                                                                   
 [699] "Impeachment of President/U. N."                                                                                 
 [700] "Communists/Communism/Unamerican Activities/Immigration/Naturalization"                                          
 [701] "Agriculture/South Africa/Rhodesia"                                                                              
 [702] "U. N./South Africa/Rhodesia"                                                                                    
 [703] "Congressional Pay and Benefits/Campaign Contributions, Lobbying/House Ethics Campaign Laws"                     
 [704] "Agriculture/Tax rates"                                                                                          
 [705] "Tax rates/Pollution and Environmental Protection"                                                               
 [706] "Housing/Housing Programs/Rent Control/Budget resolution "                                                       
 [707] "Military Pensions/Veterans Benefits/Budget resolution "                                                         
 [708] "Unemployment/Jobs/Budget resolution "                                                                           
 [709] "Fish and Wildlife/Budget resolution "                                                                           
 [710] "Agriculture/Food Stamps/Food Programs"                                                                          
 [711] "Coal Mining Regulation/Strip Mining/Black Lung/Energy"                                                          
 [712] "Human Rights/Communists/Communism/Unamerican Activities"                                                        
 [713] "Neutron Bomb/Arms Control"                                                                                      
 [714] "Science and Technology/Union Regulation/Davis-Bacon/Situs Picketing"                                            
 [715] "Nuclear Power/States Rights vs. Federal Government"                                                             
 [716] "Coal Mining Regulation/Strip Mining/Black Lung/Public Health"                                                   
 [717] "States Rights vs. Federal Government/Parks and Conservation"                                                    
 [718] "Parks and Conservation/Pollution and Environmental Protection"                                                  
 [719] "Interstate Commerce/Anti-trust/Restraint of Commerce/Coal Mining Regulation/Strip Mining/Black Lung"            
 [720] "Agriculture/Budget resolution "                                                                                 
 [721] "Military Pensions/Veterans Benefits/Vietnam War"                                                                
 [722] "Parks and Conservation/Emergency Fuel Assistance"                                                               
 [723] "Public Works/Parks and Conservation"                                                                            
 [724] "Energy/Education"                                                                                               
 [725] "Energy/Parks and Conservation"                                                                                  
 [726] "States Rights vs. Federal Government/Immigration/Naturalization"                                                
 [727] "Military Pensions/Veterans Benefits/Education"                                                                  
 [728] "Civil Rights/Desegregation/Busing/Affirmative Action/Energy"                                                    
 [729] "Energy/Tariffs"                                                                                                 
 [730] "Energy/Social Security"                                                                                         
 [731] "Social Security/Tax rates"                                                                                      
 [732] "Social Security/Military Pensions/Veterans Benefits"                                                            
 [733] "Social Security/Education"                                                                                      
 [734] "Nuclear Power/Pollution and Environmental Protection"                                                           
 [735] "Parks and Conservation/Fish and Wildlife"                                                                       
 [736] "Airlines/Airports/Airline Industry/Union Regulation/Davis-Bacon/Situs Picketing"                                
 [737] "Unemployment/Jobs/Food Stamps/Food Programs"                                                                    
 [738] "States Rights vs. Federal Government/Public Health"                                                             
 [739] "Space Exploration/NASA/Housing/Housing Programs/Rent Control"                                                   
 [740] "Abortion/Care of deformed newborns/States Rights vs. Federal Government"                                        
 [741] "Budget resolution /Debt Ceilings"                                                                               
 [742] "Abortion/Care of deformed newborns/Civil Rights/Desegregation/Busing/Affirmative Action"                        
 [743] "MX Missile"                                                                                                     
 [744] "Iran"                                                                                                           
 [745] "School Prayer/Supreme Court"                                                                                    
 [746] "Budget resolution /Food Stamps/Food Programs"                                                                   
 [747] "Narcotics/Central America"                                                                                      
 [748] "Budget resolution /Shipping/Maritime"                                                                           
 [749] "Handicapped/Housing/Housing Programs/Rent Control"                                                              
 [750] "Nuclear Power/Public Safety"                                                                                    
 [751] "Public Health/Education"                                                                                        
 [752] "Civil Rights/Desegregation/Busing/Affirmative Action/Unemployment/Jobs"                                         
 [753] "Immigration/Naturalization/States Rights vs. Federal Government"                                                
 [754] "Tax rates/Budget resolution "                                                                                   
 [755] "Treaties/Panama Canal"                                                                                          
 [756] "Central America/Communists/Communism/Unamerican Activities"                                                     
 [757] "Emergency Fuel Assistance/Energy"                                                                               
 [758] "Energy/Emergency Fuel Assistance"                                                                               
 [759] "Energy/Interstate Commerce/Anti-trust/Restraint of Commerce"                                                    
 [760] "Human Rights/Immigration/Naturalization"                                                                        
 [761] "Treaties/Pollution and Environmental Protection"                                                                
 [762] "Public Health/Military Pensions/Veterans Benefits"                                                              
 [763] "Public Health/Vietnam War"                                                                                      
 [764] "Children (aid, infant mortality, etc.)/Unemployment/Jobs"                                                       
 [765] "Budget resolution /Parks and Conservation"                                                                      
 [766] "Budget resolution /States Rights vs. Federal Government"                                                        
 [767] "Budget resolution /Public Health"                                                                               
 [768] "Energy/Debt Ceilings"                                                                                           
 [769] "Selective Service (The Draft)/Women's Equality"                                                                 
 [770] "Union Regulation/Davis-Bacon/Situs Picketing/Energy"                                                            
 [771] "Communists/Communism/Unamerican Activities/Central America"                                                     
 [772] "Airlines/Airports/Airline Industry/Tax rates"                                                                   
 [773] "Children (aid, infant mortality, etc.)/Education"                                                               
 [774] "Energy/Public Works"                                                                                            
 [775] "Food Stamps/Food Programs/Treaties"                                                                             
 [776] "Radio/Television/Motion Pictures/Telecommunications/Treaties"                                                   
 [777] "Communists/Communism/Unamerican Activities/Treaties"                                                            
 [778] "Price Controls/Interstate Commerce/Anti-trust/Restraint of Commerce"                                            
 [779] "Communists/Communism/Unamerican Activities/Agriculture"                                                         
 [780] "Price Controls/Agriculture"                                                                                     
 [781] "Public Health/Energy"                                                                                           
 [782] "Children (aid, infant mortality, etc.)/Food Stamps/Food Programs"                                               
 [783] "Food Stamps/Food Programs/Emergency Fuel Assistance"                                                            
 [784] "Food Stamps/Food Programs/Narcotics"                                                                            
 [785] "Food Stamps/Food Programs/Unemployment/Jobs"                                                                    
 [786] "Treaties/Nuclear Power"                                                                                         
 [787] "Treaties/Interstate Commerce/Anti-trust/Restraint of Commerce"                                                  
 [788] "Social Security/Energy"                                                                                         
 [789] "Social Security/Budget resolution "                                                                             
 [790] "Coal Mining Regulation/Strip Mining/Black Lung/Tax rates"                                                       
 [791] "Congressional Pay and Benefits/Tax rates"                                                                       
 [792] "Pollution and Environmental Protection/Nuclear Power"                                                           
 [793] "Budget resolution /Military Pensions/Veterans Benefits"                                                         
 [794] "OSHA/Coal Mining Regulation/Strip Mining/Black Lung"                                                            
 [795] "Voting Rights/Ratio of Representatives to Population"                                                           
 [796] "Airlines/Airports/Airline Industry/Energy"                                                                      
 [797] "Budget resolution /Social Security"                                                                             
 [798] "Budget resolution /Constitutional Amendments"                                                                   
 [799] "Handicapped/Education"                                                                                          
 [800] "School Prayer/Judiciary"                                                                                        
 [801] "Treaties/Education"                                                                                             
 [802] "States Rights vs. Federal Government/Unemployment/Jobs"                                                         
 [803] "Budget resolution /Unemployment/Jobs"                                                                           
 [804] "Budget resolution /Education"                                                                                   
 [805] "Interstate Commerce/Anti-trust/Restraint of Commerce/Public Works"                                              
 [806] "Parks and Conservation/Budget resolution "                                                                      
 [807] "Treaties/U. N."                                                                                                 
 [808] "Parks and Conservation/Energy"                                                                                  
 [809] "Constitutional Amendments/Supreme Court"                                                                        
 [810] "Radio/Television/Motion Pictures/Telecommunications/Communists/Communism/Unamerican Activities"                 
 [811] "Immigration/Naturalization/Central America"                                                                     
 [812] "Minorities (non-black)/Civil Rights/Desegregation/Busing/Affirmative Action"                                    
 [813] "Handicapped/Children (aid, infant mortality, etc.)"                                                             
 [814] "Energy/Treaties"                                                                                                
 [815] "Temperance and Liquor/Civil Rights/Desegregation/Busing/Affirmative Action"                                     
 [816] "Budget resolution /Banking and Finance"                                                                         
 [817] "Science and Technology/Arms Control"                                                                            
 [818] "Arms Control/Communists/Communism/Unamerican Activities"                                                        
 [819] "Communists/Communism/Unamerican Activities/Education"                                                           
 [820] "Welfare/Food Stamps/Food Programs"                                                                              
 [821] "CIA/Spying/Intelligence/Central America"                                                                        
 [822] "Humanitarian Assistance (foreign)/Central America"                                                              
 [823] "Judiciary/Handicapped"                                                                                          
 [824] "South Africa/Rhodesia/Banking and Finance"                                                                      
 [825] "South Africa/Rhodesia/Communists/Communism/Unamerican Activities"                                               
 [826] "Peace Movements/Pacifism/Anti-Military/Central America"                                                         
 [827] "Narcotics/CIA/Spying/Intelligence"                                                                              
 [828] "Public Works/Public Safety"                                                                                     
 [829] "Science and Technology/CIA/Spying/Intelligence"                                                                 
 [830] "Science and Technology/Nuclear Weapons"                                                                         
 [831] "Selective Service (The Draft)/Handicapped"                                                                      
 [832] "Alien and Sedition Laws/Narcotics"                                                                              
 [833] "Immigration/Naturalization/Housing/Housing Programs/Rent Control"                                               
 [834] "Banking and Finance/Agriculture"                                                                                
 [835] "Union Regulation/Davis-Bacon/Situs Picketing/Budget resolution "                                                
 [836] "Social Security/Immigration/Naturalization"                                                                     
 [837] "Welfare/Budget resolution "                                                                                     
 [838] "Central America/Humanitarian Assistance (foreign)"                                                              
 [839] "Abortion/Care of deformed newborns/Human Rights"                                                                
 [840] "South Africa/Rhodesia/Human Rights"                                                                             
 [841] "Judiciary/School Prayer"                                                                                        
 [842] "Communists/Communism/Unamerican Activities/Nuclear Weapons"                                                     
 [843] "Tax rates/Gasoline rationing/allocation"                                                                        
 [844] "Education/Unemployment/Jobs"                                                                                    
 [845] "Airlines/Airports/Airline Industry/Shipping/Maritime"                                                           
 [846] "Shipping/Maritime/Agriculture"                                                                                  
 [847] "Agriculture/Shipping/Maritime"                                                                                  
 [848] "Nuclear Weapons/Public Health"                                                                                  
 [849] "Food Stamps/Food Programs/Tax rates"                                                                            
 [850] "Nuclear Power/Communists/Communism/Unamerican Activities"                                                       
 [851] "Nuclear Power/Arms Control"                                                                                     
 [852] "Constitutional Amendments/States Rights vs. Federal Government"                                                 
 [853] "Constitutional Amendments/Social Security"                                                                      
 [854] "Airlines/Airports/Airline Industry/Education"                                                                   
 [855] "Budget resolution /Children (aid, infant mortality, etc.)"                                                      
 [856] "Budget resolution /Science and Technology"                                                                      
 [857] "Consumer Protection Agency/Consumer Protection/Pollution and Environmental Protection"                          
 [858] "Budget resolution /Energy"                                                                                      
 [859] "Debt Ceilings/Agriculture"                                                                                      
 [860] "Debt Ceilings/Judiciary"                                                                                        
 [861] "Central America/CIA/Spying/Intelligence"                                                                        
 [862] "South Africa/Rhodesia/Airlines/Airports/Airline Industry"                                                       
 [863] "South Africa/Rhodesia/States Rights vs. Federal Government"                                                     
 [864] "South Africa/Rhodesia/Humanitarian Assistance (foreign)"                                                        
 [865] "South Africa/Rhodesia/Unemployment/Jobs"                                                                        
 [866] "Interstate Commerce/Anti-trust/Restraint of Commerce/Union Regulation/Davis-Bacon/Situs Picketing"              
 [867] "Interstate Commerce/Anti-trust/Restraint of Commerce/South Africa/Rhodesia"                                     
 [868] "Central America/Human Rights"                                                                                   
 [869] "Treaties/Science and Technology"                                                                                
 [870] "Treaties/Children (aid, infant mortality, etc.)"                                                                
 [871] "Budget resolution /Immigration/Naturalization"                                                                  
 [872] "Impeachments and Investigations/Iran"                                                                           
 [873] "Food Stamps/Food Programs/Public Health"                                                                        
 [874] "Campaign Contributions, Lobbying/House Ethics Campaign Laws/Welfare"                                            
 [875] "CIA/Spying/Intelligence/Iran"                                                                                   
 [876] "Pollution and Environmental Protection/Science and Technology"                                                  
 [877] "Pollution and Environmental Protection/Energy"                                                                  
 [878] "Shipping/Maritime/Iran"                                                                                         
 [879] "Housing/Housing Programs/Rent Control/Agriculture"                                                              
 [880] "Fish and Wildlife/Education"                                                                                    
 [881] "Public Safety/Public Health"                                                                                    
 [882] "Public Health/CIA/Spying/Intelligence"                                                                          
 [883] "Banking and Finance/CIA/Spying/Intelligence"                                                                    
 [884] "Judiciary/CIA/Spying/Intelligence"                                                                              
 [885] "Interstate Commerce/Anti-trust/Restraint of Commerce/CIA/Spying/Intelligence"                                   
 [886] "Public Lands/Energy"                                                                                            
 [887] "Pollution and Environmental Protection/Space Exploration/NASA"                                                  
 [888] "Gasoline rationing/allocation/Science and Technology"                                                           
 [889] "Civil Service and Patronage/Judiciary"                                                                          
 [890] "Union Regulation/Davis-Bacon/Situs Picketing/Immigration/Naturalization"                                        
 [891] "Budget resolution /Narcotics"                                                                                   
 [892] "Military Pensions/Veterans Benefits/Nuclear Weapons"                                                            
 [893] "Firearms/Communists/Communism/Unamerican Activities"                                                            
 [894] "Narcotics/Energy"                                                                                               
 [895] "Civil Rights/Desegregation/Busing/Affirmative Action/Homosexuality"                                             
 [896] "Children (aid, infant mortality, etc.)/Science and Technology"                                                  
 [897] "Public Health/Radio/Television/Motion Pictures/Telecommunications"                                              
 [898] "Narcotics/Airlines/Airports/Airline Industry"                                                                   
 [899] "Judiciary/Housing/Housing Programs/Rent Control"                                                                
 [900] "Housing/Housing Programs/Rent Control/Handicapped"                                                              
 [901] "Women's Equality/Housing/Housing Programs/Rent Control"                                                         
 [902] "Public Lands/Public Health"                                                                                     
 [903] "Abortion/Care of deformed newborns/Welfare"                                                                     
 [904] "Military Pensions/Veterans Benefits/Judiciary"                                                                  
 [905] "Women's Equality/Banking and Finance"                                                                           
 [906] "Public Lands/Housing/Housing Programs/Rent Control"                                                             
 [907] "Iran/Impeachments and Investigations"                                                                           
 [908] "States Rights vs. Federal Government/Banking and Finance"                                                       
 [909] "Minimum Wage/Unemployment/Jobs"                                                                                 
 [910] "Public Health/Consumer Protection Agency/Consumer Protection"                                                   
 [911] "Campaign Contributions, Lobbying/House Ethics Campaign Laws/Budget resolution "                                 
 [912] "Human Rights/Central America"                                                                                   
 [913] "Housing/Housing Programs/Rent Control/Unemployment/Jobs"                                                        
 [914] "Energy/Shipping/Maritime"                                                                                       
 [915] "Debt Ceilings/Budget resolution "                                                                               
 [916] "Debt Ceilings/Congressional Pay and Benefits"                                                                   
 [917] "CIA/Spying/Intelligence/U. N."                                                                                  
 [918] "Communists/Communism/Unamerican Activities/Humanitarian Assistance (foreign)"                                   
 [919] "Immigration/Naturalization/Amnesty (all wars)"                                                                  
 [920] "Space Exploration/NASA/Military Pensions/Veterans Benefits"                                                     
 [921] "Judiciary/Impeachments and Investigations"                                                                      
 [922] "Public Health/Tax rates"                                                                                        
 [923] "Airlines/Airports/Airline Industry/Budget resolution "                                                          
 [924] "Airlines/Airports/Airline Industry/Narcotics"                                                                   
 [925] "Airlines/Airports/Airline Industry/Interstate Commerce/Anti-trust/Restraint of Commerce"                        
 [926] "Airlines/Airports/Airline Industry/Consumer Protection Agency/Consumer Protection"                              
 [927] "Public Health/Public Safety"                                                                                    
 [928] "Public Health/Nuclear Power"                                                                                    
 [929] "Civil Rights/Desegregation/Busing/Affirmative Action/Abortion/Care of deformed newborns"                        
 [930] "Civil Rights/Desegregation/Busing/Affirmative Action/Handicapped"                                               
 [931] "Union Regulation/Davis-Bacon/Situs Picketing/Treaties"                                                          
 [932] "Union Regulation/Davis-Bacon/Situs Picketing/Shipping/Maritime"                                                 
 [933] "Workplace conditions/8 hour day/States Rights vs. Federal Government"                                           
 [934] "Nuclear Power/Public Health"                                                                                    
 [935] "Pollution and Environmental Protection/Workplace conditions/8 hour day"                                         
 [936] "Budget resolution /Airlines/Airports/Airline Industry"                                                          
 [937] "Space Exploration/NASA/Arms Control"                                                                            
 [938] "Human Rights/Arms Control"                                                                                      
 [939] "Homosexuality/Religion"                                                                                         
 [940] "Space Exploration/NASA/Pollution and Environmental Protection"                                                  
 [941] "Housing/Housing Programs/Rent Control/Military Pensions/Veterans Benefits"                                      
 [942] "Pollution and Environmental Protection/Fish and Wildlife"                                                       
 [943] "Women's Equality/Civil Service and Patronage"                                                                   
 [944] "Banking and Finance/Civil Rights/Desegregation/Busing/Affirmative Action"                                       
 [945] "Housing/Housing Programs/Rent Control/Narcotics"                                                                
 [946] "Airlines/Airports/Airline Industry/Banking and Finance"                                                         
 [947] "Immigration/Naturalization/Human Rights"                                                                        
 [948] "Narcotics/Public Safety"                                                                                        
 [949] "Children (aid, infant mortality, etc.)/Religion"                                                                
 [950] "Tariffs/Communists/Communism/Unamerican Activities"                                                             
 [951] "Public Works/Nuclear Weapons"                                                                                   
 [952] "Parks and Conservation/Public Lands"                                                                            
 [953] "Human Rights/Tariffs"                                                                                           
 [954] "Debt Ceilings/Social Security"                                                                                  
 [955] "Nuclear Weapons/Peace Movements/Pacifism/Anti-Military"                                                         
 [956] "Tariffs/Human Rights"                                                                                           
 [957] "Fish and Wildlife/Public Health"                                                                                
 [958] "Minimum Wage/Union Regulation/Davis-Bacon/Situs Picketing"                                                      
 [959] "Central America/Panama Canal"                                                                                   
 [960] "Children (aid, infant mortality, etc.)/Welfare"                                                                 
 [961] "Immigration/Naturalization/Ratio of Representatives to Population"                                              
 [962] "Narcotics/Communists/Communism/Unamerican Activities"                                                           
 [963] "Abortion/Care of deformed newborns/Communists/Communism/Unamerican Activities"                                  
 [964] "Welfare/Narcotics"                                                                                              
 [965] "Central America/Narcotics"                                                                                      
 [966] "Minimum Wage/Agriculture"                                                                                       
 [967] "Pollution and Environmental Protection/South Africa/Rhodesia"                                                   
 [968] "Pollution and Environmental Protection/Coal Mining Regulation/Strip Mining/Black Lung"                          
 [969] "Handicapped/Airlines/Airports/Airline Industry"                                                                 
 [970] "Housing/Housing Programs/Rent Control/Public Health"                                                            
 [971] "Tariffs/Agriculture"                                                                                            
 [972] "Campaign Contributions, Lobbying/House Ethics Campaign Laws/Impeachments and Investigations"                    
 [973] "Campaign Contributions, Lobbying/House Ethics Campaign Laws/Children (aid, infant mortality, etc.)"             
 [974] "Narcotics/Children (aid, infant mortality, etc.)"                                                               
 [975] "Interstate Commerce/Anti-trust/Restraint of Commerce/Pollution and Environmental Protection"                    
 [976] "Abortion/Care of deformed newborns/U. N."                                                                       
 [977] "Civil Rights/Desegregation/Busing/Affirmative Action/Women's Equality"                                          
 [978] "CIA/Spying/Intelligence/Narcotics"                                                                              
 [979] "Union Regulation/Davis-Bacon/Situs Picketing/Unemployment/Jobs"                                                 
 [980] "Shipping/Maritime/Narcotics"                                                                                    
 [981] "Airlines/Airports/Airline Industry/Workplace conditions/8 hour day"                                             
 [982] "Space Exploration/NASA/Science and Technology"                                                                  
 [983] "Congressional Pay and Benefits/Constitutional Amendments"                                                       
 [984] "Agriculture/Pollution and Environmental Protection"                                                             
 [985] "Science and Technology/Public Works"                                                                            
 [986] "Parks and Conservation/Housing/Housing Programs/Rent Control"                                                   
 [987] "Pollution and Environmental Protection/Shipping/Maritime"                                                       
 [988] "OSHA/Public Health"                                                                                             
 [989] "Banking and Finance/Consumer Protection Agency/Consumer Protection"                                             
 [990] "Education/School Prayer"                                                                                        
 [991] "Education/Welfare"                                                                                              
 [992] "Radio/Television/Motion Pictures/Telecommunications/Children (aid, infant mortality, etc.)"                     
 [993] "Children (aid, infant mortality, etc.)/Tax rates"                                                               
 [994] "U. N./Pollution and Environmental Protection"                                                                   
 [995] "Shipping/Maritime/Budget resolution "                                                                           
 [996] "Voting Rights/Immigration/Naturalization"                                                                       
 [997] "Arms Control/Public Health"                                                                                     
 [998] "Campaign Contributions, Lobbying/House Ethics Campaign Laws/Tax rates"                                          
 [999] "Radio/Television/Motion Pictures/Telecommunications/Consumer Protection Agency/Consumer Protection"             
[1000] "Public Works/Nuclear Power"                                                                                     
 [ reached getOption("max.print") -- omitted 344 entries ]

> fulldf$clausen_codes %>% unique
[1] "Government Management"      "Miscellaneous Policy"       "Foreign and Defense Policy"
[4] "Civil Liberties"            "Social Welfare"             NA                          
[7] "Agriculture"               

> #this is all relevant bills in the sample
> tmp<-!is.na(fulldf$crs_policy_area) & 
+   fulldf$crs_policy_area=="Crime and Law Enforcement" 

> tmp<-tmp | (
+   !is.na(fulldf$issue_codes) & 
+     fulldf$issue_codes%in%c("Judiciary","Narcotics")
+ )

> tmp<-tmp | (
+   !is.na(fulldf$punitive)
+ )

> sum(tmp) 
[1] 3206

> fulldf$punitivevote<-tmp

> #########################################################
> #########################################################
> 
> #limit the data to the postwar period
> tmp<-year(fulldf$date)>=1945

> fulldf<-fulldf[tmp,]

> #########################################################
> #########################################################
> 
> #can we identify a woT sample
> tmp<-year(fulldf$date)>2001 & 
+   fulldf$punitivevote

> sum(fulldf$punitivevote)
[1] 2267

> #########################################################
> #########################################################
> 
> #save out
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> write.csv(
+   fulldf,
+   "02_voting_votesdf.csv",
+   row.names=F
+ )
[1] "######"
[1] "Running:"
[1] "02_matcheos.R"

> #########################################################
> #########################################################
> 
> #clear workspace
> rm(list=ls())

> #load packages
> require(stringr)

> require(plyr)

> require(dplyr)

> require(zoo)

> require(data.table)

> require(tidyr)

> require(rprojroot)

> #set dirs
> rootdir<-find_root(
+   criterion=has_file('_rapol.Rproj')
+ )

> codedir<-file.path(rootdir,"code")

> setwd(codedir); dir()
[1] "01_publicopinion" "02_voting"        "03_dind"          "dirs.R"          
[5] "runeverything.R" 

> source('dirs.R')

> #########################################################
> #########################################################
> 
> #HELPER FUNCTION
> 
> matchme<-function(vector,string) {
+   
+   #vector<-ssdf$firstname
+   #string<-thisfirstname
+   
+   ###make both of these lower
+   vector<-tolower(vector)
+   string<-tolower(string)
+   
+   ###remove apostrophe's et al
+   ###special characters
+   vector<-str_replace_all(
+     vector,
+     "(\\s|\\.|\\-|')",
+     ""
+   )
+   string<-str_replace_all(
+     string,
+     "(\\s|\\.|\\-|')",
+     ""
+   )
+   
+   ####
+   match<-str_detect(vector,fixed(string))
+   if(sum(match)==0) {
+     match<-str_detect(fixed(string),vector)
+     if(sum(match)==0)
+       match<-rep(F,length(vector))
+   }
+   return(match)
+ }

> #test
> string<-"Hello"; vector<-c("Apple","World","Hello!")

> matchme(vector,string)
[1] FALSE FALSE  TRUE

> string<-"Hello!"; vector<-c("Apple","World","Hello")

> matchme(vector,string)
[1] FALSE FALSE  TRUE

> string<-"Yahoo!"; vector<-c("Apple","World","Hello")

> matchme(vector,string)
[1] FALSE FALSE FALSE

> #########################################################
> #########################################################
> 
> #read in data on all members
> #source: https://voteview.com/articles/data_help_members
> setwd(datadir); dir()
 [1] "abcdfs.csv"                                 "All_Bills(1947-2017).csv"                  
 [3] "american_violence_data_20241016_201046.csv" "anesdf.csv"                                
 [5] "beodf.csv"                                  "beodf5.csv"                                
 [7] "billsdf_revised.csv"                        "cbcdf.csv"                                 
 [9] "cbsdfs.csv"                                 "citecount.csv"                             
[11] "congressmen_list.csv"                       "conventional.bib"                          
[13] "cpopdf.csv"                                 "Ethnic_Collect_Action.dta"                 
[15] "fulldf_bills.csv"                           "gallupdfs.csv"                             
[17] "gssdf.csv"                                  "Hall_votes.csv"                            
[19] "histpundf_national_220328.csv"              "housevotes_handcoded_revised.csv"          
[21] "HSall_members.csv"                          "HSall_rollcalls_withissues.csv"            
[23] "incrates_subnationalstate.csv"              "lbj1964.csv"                               
[25] "nbclatdfs.csv"                              "prezdf.csv"                                
[27] "questions_fortex.txt"                       "race_riot.dta"                             
[29] "redf.csv"                                   "roperdfs.csv"                              
[31] "tab_longviolence.csv"                       "timedfs.csv"                               
[33] "votes"                                     

> membersdf<-fread(
+   'HSall_members.csv',
+   encoding='UTF-8'
+ )

> #names are stored w/ 1, 2 or 3 commas
> tmp<-str_count(membersdf$bioname,",")

> membersdf[tmp==3,]
    congress chamber icpsr state_icpsr district_code state_abbrev party_code occupancy
       <int>  <char> <int>       <int>         <int>       <char>      <int>     <int>
 1:       97   House 14810          35             2           NE        200         0
 2:       98   House 14810          35             2           NE        200         0
 3:       98   House 15073          47             2           NC        100         0
 4:       99   House 14810          35             2           NE        200         0
 5:       99   House 15073          47             2           NC        100         0
 6:      100   House 14810          35             2           NE        200         0
 7:      100   House 15073          47             2           NC        100         0
 8:      101   House 15073          47             2           NC        100         0
 9:      102   House 15073          47             2           NC        100         0
10:      103   House 15073          47             2           NC        100         0
    last_means                                 bioname bioguide_id  born  died nominate_dim1
         <int>                                  <char>      <char> <num> <num>         <num>
 1:          1           DAUB, Harold John, Jr., (Hal)     D000065  1941    NA         0.391
 2:          1           DAUB, Harold John, Jr., (Hal)     D000065  1941    NA         0.391
 3:          1 VALENTINE, Itimous Thaddeus, Jr., (Tim)     V000006  1926  2015        -0.084
 4:          1           DAUB, Harold John, Jr., (Hal)     D000065  1941    NA         0.391
 5:          1 VALENTINE, Itimous Thaddeus, Jr., (Tim)     V000006  1926  2015        -0.084
 6:          1           DAUB, Harold John, Jr., (Hal)     D000065  1941    NA         0.391
 7:          1 VALENTINE, Itimous Thaddeus, Jr., (Tim)     V000006  1926  2015        -0.084
 8:          1 VALENTINE, Itimous Thaddeus, Jr., (Tim)     V000006  1926  2015        -0.084
 9:          1 VALENTINE, Itimous Thaddeus, Jr., (Tim)     V000006  1926  2015        -0.084
10:          1 VALENTINE, Itimous Thaddeus, Jr., (Tim)     V000006  1926  2015        -0.084
    nominate_dim2 nominate_log_likelihood nominate_geo_mean_probability
            <num>                   <num>                         <num>
 1:        -0.322               -233.2459                         0.707
 2:        -0.322               -253.7917                         0.722
 3:         0.465               -218.5494                         0.742
 4:        -0.322               -242.3700                         0.730
 5:         0.465               -204.3586                         0.763
 6:        -0.322               -188.4671                         0.752
 7:         0.465               -209.2576                         0.754
 8:         0.465               -249.8930                         0.711
 9:         0.465               -299.8685                         0.661
10:         0.465               -390.7593                         0.652
    nominate_number_of_votes nominate_number_of_errors conditional nokken_poole_dim1
                       <int>                     <int>      <lgcl>             <num>
 1:                      673                       100          NA             0.315
 2:                      778                       111          NA             0.375
 3:                      731                        97          NA            -0.086
 4:                      769                        97          NA             0.412
 5:                      754                        93          NA            -0.130
 6:                      662                        83          NA             0.415
 7:                      741                        92          NA            -0.082
 8:                      734                       127          NA            -0.066
 9:                      725                       145          NA            -0.046
10:                      915                       189          NA            -0.116
    nokken_poole_dim2
                <num>
 1:            -0.259
 2:            -0.179
 3:             0.539
 4:            -0.341
 5:             0.748
 6:            -0.312
 7:             0.614
 8:             0.306
 9:             0.484
10:             0.352

> membersdf[tmp==2,]
      congress chamber icpsr state_icpsr district_code state_abbrev party_code occupancy
         <int>  <char> <int>       <int>         <int>       <char>      <int>     <int>
   1:        1   House  9489           1            98           CT       5000         0
   2:        1   House  5903          40             5           VA       4000         0
   3:        1  Senate  8845           5             0           RI       4000         0
   4:        2   House  9489           1            98           CT       5000         0
   5:        2   House  5903          40             5           VA       4000         0
  ---                                                                                   
2505:      116   House 14657          25             5           WI        200        NA
2506:      116  Senate 41501          44             0           GA        200        NA
2507:      116  Senate 41300           2             0           ME        328        NA
2508:      116  Senate 40703          14             0           PA        100        NA
2509:      116  Senate 40915          56             0           WV        100        NA
      last_means                         bioname bioguide_id  born  died nominate_dim1
           <int>                          <char>      <char> <num> <num>         <num>
   1:          1         TRUMBULL, Jonathan, Jr.     T000389  1740  1809         0.692
   2:          1             MADISON, James, Jr.     M000043  1751  1836        -0.061
   3:          3            STANTON, Joseph, Jr.     S000805  1739  1807        -0.091
   4:          1         TRUMBULL, Jonathan, Jr.     T000389  1740  1809         0.692
   5:          1             MADISON, James, Jr.     M000043  1751  1836        -0.061
  ---                                                                                 
2505:         NA SENSENBRENNER, Frank James, Jr.     S000244  1943    NA         0.643
2506:         NA       PERDUE, David Alfred, Jr.     P000612  1949    NA         0.586
2507:         NA        KING, Angus Stanley, Jr.     K000383  1944    NA        -0.154
2508:         NA        CASEY, Robert (Bob), Jr.     C001070  1960    NA        -0.304
2509:         NA               MANCHIN, Joe, III     M001183  1947    NA        -0.058
      nominate_dim2 nominate_log_likelihood nominate_geo_mean_probability
              <num>                   <num>                         <num>
   1:         0.246               -30.47113                       0.75000
   2:        -0.444               -28.81899                       0.75400
   3:         0.736               -22.70883                       0.68900
   4:         0.246                      NA                            NA
   5:        -0.444               -28.79201                       0.73100
  ---                                                                    
2505:        -0.603               -50.04543                       0.73141
2506:        -0.109                -7.67437                       0.93651
2507:        -0.234               -11.97690                       0.91263
2508:         0.194               -17.86328                       0.87253
2509:         0.458               -10.25240                       0.92303
      nominate_number_of_votes nominate_number_of_errors conditional nokken_poole_dim1
                         <int>                     <int>      <lgcl>             <num>
   1:                      106                        11          NA             0.749
   2:                      102                        13          NA             0.023
   3:                       61                        11          NA            -0.245
   4:                       NA                        NA          NA             0.975
   5:                       92                        13          NA            -0.129
  ---                                                                                 
2505:                      160                        26          NA             0.528
2506:                      117                         3          NA             0.432
2507:                      131                         4          NA            -0.176
2508:                      131                         9          NA            -0.312
2509:                      128                         0          NA            -0.045
      nokken_poole_dim2
                  <num>
   1:             0.166
   2:            -0.531
   3:             0.969
   4:             0.223
   5:            -0.075
  ---                  
2505:            -0.198
2506:             0.348
2507:            -0.244
2508:             0.309
2509:             0.378

> membersdf[tmp==1,]
       congress   chamber icpsr state_icpsr district_code state_abbrev party_code occupancy
          <int>    <char> <int>       <int>         <int>       <char>      <int>     <int>
    1:        1 President 99869          99             0          USA       5000        NA
    2:        1     House  4766           1            98           CT       5000         0
    3:        1     House  8457           1            98           CT       5000         0
    4:        1     House  9062           1            98           CT       5000         0
    5:        1     House  9706           1            98           CT       5000         0
   ---                                                                                     
46830:      116    Senate 20146          56             0           WV        200        NA
46831:      116    Senate 29940          25             0           WI        100        NA
46832:      116    Senate 41111          25             0           WI        200        NA
46833:      116    Senate 40707          68             0           WY        200        NA
46834:      116    Senate 49706          68             0           WY        200        NA
       last_means               bioname bioguide_id  born  died nominate_dim1 nominate_dim2
            <int>                <char>      <char> <num> <num>         <num>         <num>
    1:         NA    WASHINGTON, George                NA    NA            NA            NA
    2:          1  HUNTINGTON, Benjamin     H000995  1736  1800         0.639         0.304
    3:          1        SHERMAN, Roger     S000349  1721  1793         0.589         0.307
    4:          1     STURGES, Jonathan     S001047  1740  1819         0.531         0.448
    5:          1   WADSWORTH, Jeremiah     W000013  1743  1804         0.738         0.206
   ---                                                                                     
46830:         NA CAPITO, Shelley Moore     C001047  1953    NA         0.261         0.054
46831:         NA        BALDWIN, Tammy     B001230  1962    NA        -0.518        -0.223
46832:         NA          JOHNSON, Ron     J000293  1955    NA         0.601        -0.303
46833:         NA     BARRASSO, John A.     B001261  1952    NA         0.538         0.233
46834:         NA      ENZI, Michael B.     E000285  1944    NA         0.540         0.189
       nominate_log_likelihood nominate_geo_mean_probability nominate_number_of_votes
                         <num>                         <num>                    <int>
    1:                      NA                            NA                       NA
    2:               -29.04670                       0.70800                       84
    3:               -40.59580                       0.68400                      107
    4:               -25.87361                       0.72400                       80
    5:               -16.56694                       0.82500                       86
   ---                                                                               
46830:                -5.79358                       0.95506                      126
46831:               -14.95555                       0.89133                      130
46832:               -12.69404                       0.90628                      129
46833:               -10.36398                       0.92393                      131
46834:               -11.21496                       0.91673                      129
       nominate_number_of_errors conditional nokken_poole_dim1 nokken_poole_dim2
                           <int>      <lgcl>             <num>             <num>
    1:                        NA          NA                NA                NA
    2:                        12          NA             0.649             0.229
    3:                        18          NA             0.614             0.298
    4:                        13          NA             0.573             0.529
    5:                         5          NA             0.770             0.146
   ---                                                                          
46830:                         0          NA             0.336             0.263
46831:                         9          NA            -0.421            -0.207
46832:                         6          NA             0.596            -0.020
46833:                         7          NA             0.669             0.213
46834:                         4          NA             0.459             0.824

> members_names<-str_split(membersdf$bioname,",")

> #lastnames
> membersdf$lastname<-
+   sapply(members_names,function(x) x[[1]])

> #firstnames
> membersdf$firstname<-
+   sapply(members_names,function(x) x[[2]])

> #suffixes
> membersdf$suffix<-sapply(members_names,function(x) {
+   if(length(x)>=3) {
+     y<-x[[3]]
+   } else {
+     y<-NA
+   }
+ })

> #nicknames
> membersdf$suffix<-sapply(members_names,function(x) {
+   if(length(x)>=4) {
+     y<-x[[4]]
+   } else {
+     y<-NA
+   }
+ })

> #########################################################
> #########################################################
> 
> #IDENTIFY RACE OF EO'S
> 
> #these are black congressmen
> #source: https://history.house.gov/Exhibitions-and-Publications/BAIC/Historical-Data/Black-American-Representatives-and-Senators-by-Congress/
> 
> setwd(datadir); dir()
 [1] "abcdfs.csv"                                 "All_Bills(1947-2017).csv"                  
 [3] "american_violence_data_20241016_201046.csv" "anesdf.csv"                                
 [5] "beodf.csv"                                  "beodf5.csv"                                
 [7] "billsdf_revised.csv"                        "cbcdf.csv"                                 
 [9] "cbsdfs.csv"                                 "citecount.csv"                             
[11] "congressmen_list.csv"                       "conventional.bib"                          
[13] "cpopdf.csv"                                 "Ethnic_Collect_Action.dta"                 
[15] "fulldf_bills.csv"                           "gallupdfs.csv"                             
[17] "gssdf.csv"                                  "Hall_votes.csv"                            
[19] "histpundf_national_220328.csv"              "housevotes_handcoded_revised.csv"          
[21] "HSall_members.csv"                          "HSall_rollcalls_withissues.csv"            
[23] "incrates_subnationalstate.csv"              "lbj1964.csv"                               
[25] "nbclatdfs.csv"                              "prezdf.csv"                                
[27] "questions_fortex.txt"                       "race_riot.dta"                             
[29] "redf.csv"                                   "roperdfs.csv"                              
[31] "tab_longviolence.csv"                       "timedfs.csv"                               
[33] "votes"                                     

> cbcdf<-fread(
+   'congressmen_list.csv',
+   stringsAsFactors=F,
+   encoding='UTF-8'
+ )

> names(cbcdf)<-c(
+   "congress",
+   "raw",
+   "state_alpha2",
+   "party",
+   "chamber"
+ )

> #tirm
> tmp<-cbcdf$raw==""

> cbcdf<-cbcdf[!tmp,]

> #keep raw
> cbcdf$originalraw<-cbcdf$raw

> #remove all accents
> cbcdf$raw<-str_replace_all(
+   cbcdf$raw,
+   intToUtf8(0xED),
+   "i"
+ ) %>% str_replace_all(
+   intToUtf8(0xE1),"a"
+ ) %>% str_replace_all(
+   intToUtf8(0xF3),"o"
+ ) %>% str_replace_all(
+   intToUtf8(0xFA),"u"
+ ) %>% str_replace_all(
+   intToUtf8(0xE9),"e"
+ )

> #get lastname and firstname from orig name
> cbcdf$raw
  [1] "LONG, Jefferson Franklin"                "RAINEY, Joseph Hayne"                   
  [3] "REVELS, Hiram Rhodes"                    "DE LARGE, Robert Carlos"                
  [5] "ELLIOTT, Robert Brown"                   "RAINEY, Joseph Hayne"                   
  [7] "TURNER, Benjamin Sterling"               "WALLS, Josiah Thomas"                   
  [9] "CAIN, Richard Harvey"                    "ELLIOTT, Robert Brown"                  
 [11] "LYNCH, John Roy"                         "RAINEY, Joseph Hayne"                   
 [13] "RANSIER, Alonzo Jacob"                   "RAPIER, James Thomas"                   
 [15] "WALLS, Josiah Thomas"                    "BRUCE, Blanche Kelso"                   
 [17] "HARALSON, Jeremiah"                      "HYMAN, John Adams"                      
 [19] "LYNCH, John Roy"                         "NASH, Charles Edmund"                   
 [21] "RAINEY, Joseph Hayne"                    "SMALLS, Robert"                         
 [23] "WALLS, Josiah Thomas"                    "BRUCE, Blanche Kelso"                   
 [25] "CAIN, Richard Harvey"                    "RAINEY, Joseph Hayne"                   
 [27] "SMALLS, Robert"                          "BRUCE, Blanche Kelso"                   
 [29] "LYNCH, John Roy"                         "SMALLS, Robert"                         
 [31] "O'HARA, James Edward"                    "SMALLS, Robert"                         
 [33] "O'HARA, James Edward"                    "SMALLS, Robert"                         
 [35] "CHEATHAM, Henry Plummer"                 "LANGSTON, John Mercer"                  
 [37] "MILLER, Thomas Ezekiel"                  "CHEATHAM, Henry Plummer"                
 [39] "MURRAY, George Washington"               "MURRAY, George Washington"              
 [41] "WHITE, George Henry"                     "WHITE, George Henry"                    
 [43] "DE PRIEST, Oscar Stanton"                "DE PRIEST, Oscar Stanton"               
 [45] "DE PRIEST, Oscar Stanton"                "MITCHELL, Arthur Wergs"                 
 [47] "MITCHELL, Arthur Wergs"                  "MITCHELL, Arthur Wergs"                 
 [49] "MITCHELL, Arthur Wergs"                  "DAWSON, William Levi"                   
 [51] "DAWSON, William Levi"                    "POWELL, Adam Clayton, Jr."              
 [53] "DAWSON, William Levi"                    "POWELL, Adam Clayton, Jr."              
 [55] "DAWSON, William Levi"                    "POWELL, Adam Clayton, Jr."              
 [57] "DAWSON, William Levi"                    "POWELL, Adam Clayton, Jr."              
 [59] "DAWSON, William Levi"                    "POWELL, Adam Clayton, Jr."              
 [61] "DAWSON, William Levi"                    "DIGGS, Charles Coles, Jr."              
 [63] "POWELL, Adam Clayton, Jr."               "DAWSON, William Levi"                   
 [65] "DIGGS, Charles Coles, Jr."               "NIX, Robert Nelson Cornelius, Sr."      
 [67] "POWELL, Adam Clayton, Jr."               "DAWSON, William Levi"                   
 [69] "DIGGS, Charles Coles, Jr."               "NIX, Robert Nelson Cornelius, Sr."      
 [71] "POWELL, Adam Clayton, Jr."               "DAWSON, William Levi"                   
 [73] "DIGGS, Charles Coles, Jr."               "NIX, Robert Nelson Cornelius, Sr."      
 [75] "POWELL, Adam Clayton, Jr."               "DAWSON, William Levi"                   
 [77] "DIGGS, Charles Coles, Jr."               "HAWKINS, Augustus Freeman (Gus)"        
 [79] "NIX, Robert Nelson Cornelius, Sr."       "POWELL, Adam Clayton, Jr."              
 [81] "CONYERS, John, Jr."                      "DAWSON, William Levi"                   
 [83] "DIGGS, Charles Coles, Jr."               "HAWKINS, Augustus Freeman (Gus)"        
 [85] "NIX, Robert Nelson Cornelius, Sr."       "POWELL, Adam Clayton, Jr."              
 [87] "BROOKE, Edward William, III"             "CONYERS, John, Jr."                     
 [89] "DAWSON, William Levi"                    "DIGGS, Charles Coles, Jr."              
 [91] "HAWKINS, Augustus Freeman (Gus)"         "NIX, Robert Nelson Cornelius, Sr."      
 [93] "POWELL, Adam Clayton, Jr. 1"             "BROOKE, Edward William, III"            
 [95] "CHISHOLM, Shirley Anita"                 "CLAY, William Lacy, Sr."                
 [97] "COLLINS, George Washington"              "CONYERS, John, Jr."                     
 [99] "DAWSON, William Levi"                    "DIGGS, Charles Coles, Jr."              
[101] "HAWKINS, Augustus Freeman (Gus)"         "NIX, Robert Nelson Cornelius, Sr."      
[103] "POWELL, Adam Clayton, Jr."               "STOKES, Louis"                          
[105] "BROOKE, Edward William, III"             "CHISHOLM, Shirley Anita"                
[107] "CLAY, William Lacy, Sr."                 "COLLINS, George Washington"             
[109] "CONYERS, John, Jr."                      "DELLUMS, Ronald V."                     
[111] "DIGGS, Charles Coles, Jr."               "FAUNTROY, Walter Edward"                
[113] "HAWKINS, Augustus Freeman (Gus)"         "METCALFE, Ralph Harold"                 
[115] "MITCHELL, Parren James"                  "NIX, Robert Nelson Cornelius, Sr."      
[117] "RANGEL, Charles B."                      "STOKES, Louis"                          
[119] "BROOKE, Edward William, III"             "BURKE, Yvonne Brathwaite"               
[121] "CHISHOLM, Shirley Anita"                 "CLAY, William Lacy, Sr."                
[123] "COLLINS, Cardiss"                        "CONYERS, John, Jr."                     
[125] "DELLUMS, Ronald V."                      "DIGGS, Charles Coles, Jr."              
[127] "FAUNTROY, Walter Edward"                 "HAWKINS, Augustus Freeman (Gus)"        
[129] "JORDAN, Barbara Charline"                "METCALFE, Ralph Harold"                 
[131] "MITCHELL, Parren James"                  "NIX, Robert Nelson Cornelius, Sr."      
[133] "RANGEL, Charles B."                      "STOKES, Louis"                          
[135] "YOUNG, Andrew Jackson, Jr."              "BROOKE, Edward William, III"            
[137] "BURKE, Yvonne Brathwaite"                "CHISHOLM, Shirley Anita"                
[139] "CLAY, William Lacy, Sr."                 "COLLINS, Cardiss"                       
[141] "CONYERS, John, Jr."                      "DELLUMS, Ronald V."                     
[143] "DIGGS, Charles Coles, Jr."               "FAUNTROY, Walter Edward"                
[145] "FORD, Harold Eugene"                     "HAWKINS, Augustus Freeman (Gus)"        
[147] "JORDAN, Barbara Charline"                "METCALFE, Ralph Harold"                 
[149] "MITCHELL, Parren James"                  "NIX, Robert Nelson Cornelius, Sr."      
[151] "RANGEL, Charles B."                      "STOKES, Louis"                          
[153] "YOUNG, Andrew Jackson, Jr."              "BROOKE, Edward William, III"            
[155] "BURKE, Yvonne Brathwaite"                "CHISHOLM, Shirley Anita"                
[157] "CLAY, William Lacy, Sr."                 "COLLINS, Cardiss"                       
[159] "CONYERS, John, Jr."                      "DELLUMS, Ronald V."                     
[161] "DIGGS, Charles Coles, Jr."               "FAUNTROY, Walter Edward"                
[163] "FORD, Harold Eugene"                     "HAWKINS, Augustus Freeman (Gus)"        
[165] "JORDAN, Barbara Charline"                "METCALFE, Ralph Harold"                 
[167] "MITCHELL, Parren James"                  "NIX, Robert Nelson Cornelius, Sr."      
[169] "RANGEL, Charles B."                      "STOKES, Louis"                          
[171] "YOUNG, Andrew Jackson, Jr."              "CHISHOLM, Shirley Anita"                
[173] "CLAY, William Lacy, Sr."                 "COLLINS, Cardiss"                       
[175] "CONYERS, John, Jr."                      "CROCKETT, George William, Jr."          
[177] "DELLUMS, Ronald V."                      "DIGGS, Charles Coles, Jr. 2"            
[179] "DIXON, Julian Carey"                     "EVANS, Melvin Herbert"                  
[181] "FAUNTROY, Walter Edward"                 "FORD, Harold Eugene"                    
[183] "GRAY, William Herbert, III"              "HAWKINS, Augustus Freeman (Gus)"        
[185] "LELAND, George Thomas (Mickey)"          "MITCHELL, Parren James"                 
[187] "RANGEL, Charles B."                      "STEWART, Bennett McVey"                 
[189] "STOKES, Louis"                           "CHISHOLM, Shirley Anita"                
[191] "CLAY, William Lacy, Sr."                 "COLLINS, Cardiss"                       
[193] "CONYERS, John, Jr."                      "CROCKETT, George William, Jr."          
[195] "DELLUMS, Ronald V."                      "DIXON, Julian Carey"                    
[197] "DYMALLY, Mervyn Malcolm"                 "FAUNTROY, Walter Edward"                
[199] "FORD, Harold Eugene"                     "GRAY, William Herbert, III"             
[201] "HALL, Katie Beatrice 3"                  "HAWKINS, Augustus Freeman (Gus)"        
[203] "LELAND, George Thomas (Mickey)"          "MITCHELL, Parren James"                 
[205] "RANGEL, Charles B."                      "SAVAGE, Gus"                            
[207] "STOKES, Louis"                           "WASHINGTON, Harold"                     
[209] "CLAY, William Lacy, Sr."                 "COLLINS, Cardiss"                       
[211] "CONYERS, John, Jr."                      "CROCKETT, George William, Jr."          
[213] "DELLUMS, Ronald V."                      "DIXON, Julian Carey"                    
[215] "DYMALLY, Mervyn Malcolm"                 "FAUNTROY, Walter Edward"                
[217] "FORD, Harold Eugene"                     "GRAY, William Herbert, III"             
[219] "HALL, Katie Beatrice"                    "HAWKINS, Augustus Freeman (Gus)"        
[221] "HAYES, Charles Arthur"                   "LELAND, George Thomas (Mickey)"         
[223] "MITCHELL, Parren James"                  "OWENS, Major Robert Odell"              
[225] "RANGEL, Charles B."                      "SAVAGE, Gus"                            
[227] "STOKES, Louis"                           "TOWNS, Edolphus"                        
[229] "WASHINGTON, Harold"                      "WHEAT, Alan Dupree"                     
[231] "CLAY, William Lacy, Sr."                 "COLLINS, Cardiss"                       
[233] "CONYERS, John, Jr."                      "CROCKETT, George William, Jr."          
[235] "DELLUMS, Ronald V."                      "DIXON, Julian Carey"                    
[237] "DYMALLY, Mervyn Malcolm"                 "FAUNTROY, Walter Edward"                
[239] "FORD, Harold Eugene"                     "GRAY, William Herbert, III"             
[241] "HAWKINS, Augustus Freeman (Gus)"         "HAYES, Charles Arthur"                  
[243] "LELAND, George Thomas (Mickey)"          "MITCHELL, Parren James"                 
[245] "OWENS, Major Robert Odell"               "RANGEL, Charles B."                     
[247] "SAVAGE, Gus"                             "STOKES, Louis"                          
[249] "TOWNS, Edolphus"                         "WALDON, Alton R., Jr."                  
[251] "WHEAT, Alan Dupree"                      "CLAY, William Lacy, Sr."                
[253] "COLLINS, Cardiss"                        "CONYERS, John, Jr."                     
[255] "CROCKETT, George William, Jr."           "DELLUMS, Ronald V."                     
[257] "DIXON, Julian Carey"                     "DYMALLY, Mervyn Malcolm"                
[259] "ESPY, Alphonso Michael (Mike)"           "FAUNTROY, Walter Edward"                
[261] "FLAKE, Floyd Harold"                     "FORD, Harold Eugene"                    
[263] "GRAY, William Herbert, III"              "HAWKINS, Augustus Freeman (Gus)"        
[265] "HAYES, Charles Arthur"                   "LELAND, George Thomas (Mickey)"         
[267] "LEWIS, John R."                          "MFUME, Kweisi"                          
[269] "OWENS, Major Robert Odell"               "RANGEL, Charles B."                     
[271] "SAVAGE, Gus"                             "STOKES, Louis"                          
[273] "TOWNS, Edolphus"                         "WHEAT, Alan Dupree"                     
[275] "CLAY, William Lacy, Sr."                 "COLLINS, Cardiss"                       
[277] "CONYERS, John, Jr."                      "CROCKETT, George William, Jr."          
[279] "DELLUMS, Ronald V."                      "DIXON, Julian Carey"                    
[281] "DYMALLY, Mervyn Malcolm"                 "ESPY, Alphonso Michael (Mike)"          
[283] "FAUNTROY, Walter Edward"                 "FLAKE, Floyd Harold"                    
[285] "FORD, Harold Eugene"                     "GRAY, William Herbert, III"             
[287] "HAWKINS, Augustus Freeman (Gus)"         "HAYES, Charles Arthur"                  
[289] "LELAND, George Thomas (Mickey)"          "LEWIS, John R."                         
[291] "MFUME, Kweisi"                           "OWENS, Major Robert Odell"              
[293] "PAYNE, Donald Milford"                   "RANGEL, Charles B."                     
[295] "SAVAGE, Gus"                             "STOKES, Louis"                          
[297] "TOWNS, Edolphus"                         "WASHINGTON, Craig Anthony"              
[299] "WHEAT, Alan Dupree"                      "BLACKWELL, Lucien Edward"               
[301] "CLAY, William Lacy, Sr."                 "CLAYTON, Eva M. 4"                      
[303] "COLLINS, Barbara-Rose"                   "COLLINS, Cardiss"                       
[305] "CONYERS, John, Jr."                      "DELLUMS, Ronald V."                     
[307] "DIXON, Julian Carey"                     "DYMALLY, Mervyn Malcolm"                
[309] "ESPY, Alphonso Michael (Mike)"           "FLAKE, Floyd Harold"                    
[311] "FORD, Harold Eugene"                     "FRANKS, Gary A."                        
[313] "GRAY, William Herbert, III 5"            "HAYES, Charles Arthur"                  
[315] "JEFFERSON, William Jennings"             "LEWIS, John R."                         
[317] "MFUME, Kweisi"                           "NORTON, Eleanor Holmes"                 
[319] "OWENS, Major Robert Odell"               "PAYNE, Donald Milford"                  
[321] "RANGEL, Charles B."                      "SAVAGE, Gus"                            
[323] "STOKES, Louis"                           "TOWNS, Edolphus"                        
[325] "WASHINGTON, Craig Anthony"               "WATERS, Maxine"                         
[327] "WHEAT, Alan Dupree"                      "BISHOP, Sanford Dixon, Jr."             
[329] "BLACKWELL, Lucien Edward"                "BROWN, Corrine"                         
[331] "CLAY, William Lacy, Sr."                 "CLAYTON, Eva M."                        
[333] "CLYBURN, James Enos"                     "COLLINS, Barbara-Rose"                  
[335] "COLLINS, Cardiss"                        "CONYERS, John, Jr."                     
[337] "DELLUMS, Ronald V."                      "DIXON, Julian Carey"                    
[339] "ESPY, Alphonso Michael (Mike)6"          "FIELDS, Cleo"                           
[341] "FLAKE, Floyd Harold"                     "FORD, Harold Eugene"                    
[343] "FRANKS, Gary A."                         "HASTINGS, Alcee Lamar"                  
[345] "HILLIARD, Earl Frederick"                "JEFFERSON, William Jennings"            
[347] "JOHNSON, Eddie Bernice"                  "LEWIS, John R."                         
[349] "MCKINNEY, Cynthia Ann"                   "MEEK, Carrie P."                        
[351] "MFUME, Kweisi"                           "MOSELEY BRAUN, Carol"                   
[353] "NORTON, Eleanor Holmes"                  "OWENS, Major Robert Odell"              
[355] "PAYNE, Donald Milford"                   "RANGEL, Charles B."                     
[357] "REYNOLDS, Mel"                           "RUSH, Bobby L."                         
[359] "SCOTT, Robert C."                        "STOKES, Louis"                          
[361] "THOMPSON, Bennie"                        "TOWNS, Edolphus"                        
[363] "TUCKER, Walter R., III"                  "WASHINGTON, Craig Anthony"              
[365] "WATERS, Maxine"                          "WATT, Melvin L."                        
[367] "WHEAT, Alan Dupree"                      "WYNN, Albert Russell"                   
[369] "BISHOP, Sanford Dixon, Jr."              "BROWN, Corrine"                         
[371] "CLAY, William Lacy, Sr."                 "CLAYTON, Eva M."                        
[373] "CLYBURN, James Enos"                     "COLLINS, Barbara-Rose"                  
[375] "COLLINS, Cardiss"                        "CONYERS, John, Jr."                     
[377] "CUMMINGS, Elijah Eugene"                 "DELLUMS, Ronald V."                     
[379] "DIXON, Julian Carey"                     "FATTAH, Chaka"                          
[381] "FIELDS, Cleo"                            "FLAKE, Floyd Harold"                    
[383] "FORD, Harold Eugene"                     "FRANKS, Gary A."                        
[385] "FRAZER, Victor O."                       "HASTINGS, Alcee Lamar"                  
[387] "HILLIARD, Earl Frederick"                "JACKSON, Jesse L., Jr."                 
[389] "JACKSON LEE, Sheila"                     "JEFFERSON, William Jennings"            
[391] "JOHNSON, Eddie Bernice"                  "LEWIS, John R."                         
[393] "MCKINNEY, Cynthia Ann"                   "MEEK, Carrie P."                        
[395] "MFUME, Kweisi 7"                         "MILLENDER-MCDONALD, Juanita 8"          
[397] "MOSELEY BRAUN, Carol"                    "NORTON, Eleanor Holmes"                 
[399] "OWENS, Major Robert Odell"               "PAYNE, Donald Milford"                  
[401] "RANGEL, Charles B."                      "REYNOLDS, Mel 9"                        
[403] "RUSH, Bobby L."                          "SCOTT, Robert C."                       
[405] "STOKES, Louis"                           "THOMPSON, Bennie"                       
[407] "TOWNS, Edolphus"                         "TUCKER, Walter R., III 10"              
[409] "WATERS, Maxine"                          "WATT, Melvin L."                        
[411] "WATTS, Julius Caesar, Jr. (J. C.)"       "WYNN, Albert Russell"                   
[413] "BISHOP, Sanford Dixon, Jr."              "BROWN, Corrine"                         
[415] "CARSON, Julia May"                       "CHRISTENSEN, Donna Marie 11"            
[417] "CLAY, William Lacy, Sr."                 "CLAYTON, Eva M."                        
[419] "CLYBURN, James Enos"                     "CONYERS, John, Jr."                     
[421] "CUMMINGS, Elijah Eugene"                 "DAVIS, Danny K."                        
[423] "DELLUMS, Ronald V. 12"                   "DIXON, Julian Carey"                    
[425] "FATTAH, Chaka"                           "FLAKE, Floyd Harold 13"                 
[427] "FORD, Harold, Jr."                       "HASTINGS, Alcee Lamar"                  
[429] "HILLIARD, Earl Frederick"                "JACKSON, Jesse L., Jr."                 
[431] "JACKSON LEE, Sheila"                     "JEFFERSON, William Jennings"            
[433] "JOHNSON, Eddie Bernice"                  "KILPATRICK, Carolyn Cheeks"             
[435] "LEE, Barbara 14"                         "LEWIS, John R."                         
[437] "MCKINNEY, Cynthia Ann"                   "MEEK, Carrie P."                        
[439] "MEEKS, Gregory W."                       "MILLENDER-MCDONALD, Juanita"            
[441] "MOSELEY BRAUN, Carol"                    "NORTON, Eleanor Holmes"                 
[443] "OWENS, Major Robert Odell"               "PAYNE, Donald Milford"                  
[445] "RANGEL, Charles B."                      "RUSH, Bobby L."                         
[447] "SCOTT, Robert C."                        "STOKES, Louis"                          
[449] "THOMPSON, Bennie"                        "TOWNS, Edolphus"                        
[451] "WATERS, Maxine"                          "WATT, Melvin L."                        
[453] "WATTS, Julius Caesar, Jr. (J. C.)"       "WYNN, Albert Russell"                   
[455] "BISHOP, Sanford Dixon, Jr."              "BROWN, Corrine"                         
[457] "CARSON, Julia May"                       "CHRISTENSEN, Donna Marie"               
[459] "CLAY, William Lacy, Sr."                 "CLAYTON, Eva M."                        
[461] "CLYBURN, James Enos"                     "CONYERS, John, Jr."                     
[463] "CUMMINGS, Elijah Eugene"                 "DAVIS, Danny K."                        
[465] "DIXON, Julian Carey"                     "FATTAH, Chaka"                          
[467] "FORD, Harold, Jr."                       "HASTINGS, Alcee Lamar"                  
[469] "HILLIARD, Earl Frederick"                "JACKSON, Jesse L., Jr."                 
[471] "JACKSON LEE, Sheila"                     "JEFFERSON, William Jennings"            
[473] "JOHNSON, Eddie Bernice"                  "JONES, Stephanie Tubbs"                 
[475] "KILPATRICK, Carolyn Cheeks"              "LEE, Barbara"                           
[477] "LEWIS, John R."                          "MCKINNEY, Cynthia Ann"                  
[479] "MEEK, Carrie P."                         "MEEKS, Gregory W."                      
[481] "MILLENDER-MCDONALD, Juanita"             "NORTON, Eleanor Holmes"                 
[483] "OWENS, Major Robert Odell"               "PAYNE, Donald Milford"                  
[485] "RANGEL, Charles B."                      "RUSH, Bobby L."                         
[487] "SCOTT, Robert C."                        "THOMPSON, Bennie"                       
[489] "TOWNS, Edolphus"                         "WATERS, Maxine"                         
[491] "WATT, Melvin L."                         "WATTS, Julius Caesar, Jr. (J. C.)"      
[493] "WYNN, Albert Russell"                    "BISHOP, Sanford Dixon, Jr."             
[495] "BROWN, Corrine"                          "CARSON, Julia May"                      
[497] "CHRISTENSEN, Donna Marie"                "CLAY, William Lacy, Jr."                
[499] "CLAYTON, Eva M."                         "CLYBURN, James Enos"                    
[501] "CONYERS, John, Jr."                      "CUMMINGS, Elijah Eugene"                
[503] "DAVIS, Danny K."                         "FATTAH, Chaka"                          
[505] "FORD, Harold, Jr."                       "HASTINGS, Alcee Lamar"                  
[507] "HILLIARD, Earl Frederick"                "JACKSON, Jesse L., Jr."                 
[509] "JACKSON LEE, Sheila"                     "JEFFERSON, William Jennings"            
[511] "JOHNSON, Eddie Bernice"                  "JONES, Stephanie Tubbs"                 
[513] "KILPATRICK, Carolyn Cheeks"              "LEE, Barbara"                           
[515] "LEWIS, John R."                          "MCKINNEY, Cynthia Ann"                  
[517] "MEEK, Carrie P."                         "MEEKS, Gregory W."                      
[519] "MILLENDER-MCDONALD, Juanita"             "NORTON, Eleanor Holmes"                 
[521] "OWENS, Major Robert Odell"               "PAYNE, Donald Milford"                  
[523] "RANGEL, Charles B."                      "RUSH, Bobby L."                         
[525] "SCOTT, Robert C."                        "THOMPSON, Bennie"                       
[527] "TOWNS, Edolphus"                         "WATERS, Maxine"                         
[529] "WATSON, Diane Edith 15"                  "WATT, Melvin L."                        
[531] "WATTS, Julius Caesar, Jr. (J. C.)"       "WYNN, Albert Russell"                   
[533] "BALLANCE, Frank W., Jr. 16"              "BISHOP, Sanford Dixon, Jr."             
[535] "BROWN, Corrine"                          "BUTTERFIELD, George Kenneth, Jr. (G.K.)"
[537] "CARSON, Julia May"                       "CHRISTENSEN, Donna Marie"               
[539] "CLAY, William Lacy, Jr."                 "CLYBURN, James Enos"                    
[541] "CONYERS, John, Jr."                      "CUMMINGS, Elijah Eugene"                
[543] "DAVIS, Artur"                            "DAVIS, Danny K."                        
[545] "FATTAH, Chaka"                           "FORD, Harold, Jr."                      
[547] "HASTINGS, Alcee Lamar"                   "JACKSON, Jesse L., Jr."                 
[549] "JACKSON LEE, Sheila"                     "JEFFERSON, William Jennings"            
[551] "JOHNSON, Eddie Bernice"                  "JONES, Stephanie Tubbs"                 
[553] "KILPATRICK, Carolyn Cheeks"              "LEE, Barbara"                           
[555] "LEWIS, John R."                          "MAJETTE, Denise L."                     
[557] "MEEK, Kendrick B."                       "MEEKS, Gregory W."                      
[559] "MILLENDER-MCDONALD, Juanita"             "NORTON, Eleanor Holmes"                 
[561] "OWENS, Major Robert Odell"               "PAYNE, Donald Milford"                  
[563] "RANGEL, Charles B."                      "RUSH, Bobby L."                         
[565] "SCOTT, David"                            "SCOTT, Robert C."                       
[567] "THOMPSON, Bennie"                        "TOWNS, Edolphus"                        
[569] "WATERS, Maxine"                          "WATSON, Diane Edith"                    
[571] "WATT, Melvin L."                         "WYNN, Albert Russell"                   
[573] "BISHOP, Sanford Dixon, Jr."              "BROWN, Corrine"                         
[575] "BUTTERFIELD, George Kenneth, Jr. (G.K.)" "CARSON, Julia May"                      
[577] "CHRISTENSEN, Donna Marie"                "CLAY, William Lacy, Jr."                
[579] "CLEAVER, Emanuel, II"                    "CLYBURN, James Enos"                    
[581] "CONYERS, John, Jr."                      "CUMMINGS, Elijah Eugene"                
[583] "DAVIS, Artur"                            "DAVIS, Danny K."                        
[585] "FATTAH, Chaka"                           "FORD, Harold, Jr."                      
[587] "GREEN, Al"                               "HASTINGS, Alcee Lamar"                  
[589] "JACKSON, Jesse L., Jr."                  "JACKSON LEE, Sheila"                    
[591] "JEFFERSON, William Jennings"             "JOHNSON, Eddie Bernice"                 
[593] "JONES, Stephanie Tubbs"                  "KILPATRICK, Carolyn Cheeks"             
[595] "LEE, Barbara"                            "LEWIS, John R."                         
[597] "MCKINNEY, Cynthia Ann"                   "MEEK, Kendrick B."                      
[599] "MEEKS, Gregory W."                       "MILLENDER-MCDONALD, Juanita"            
[601] "MOORE, Gwendolynne S. (Gwen)"            "NORTON, Eleanor Holmes"                 
[603] "OBAMA, Barack"                           "OWENS, Major Robert Odell"              
[605] "PAYNE, Donald Milford"                   "RANGEL, Charles B."                     
[607] "RUSH, Bobby L."                          "SCOTT, David"                           
[609] "SCOTT, Robert C."                        "THOMPSON, Bennie"                       
[611] "TOWNS, Edolphus"                         "WATERS, Maxine"                         
[613] "WATSON, Diane Edith"                     "WATT, Melvin L."                        
[615] "WYNN, Albert Russell"                    "BISHOP, Sanford Dixon, Jr."             
[617] "BROWN, Corrine"                          "BUTTERFIELD, George Kenneth, Jr. (G.K.)"
[619] "CARSON, Andre"                           "CARSON, Julia May 17"                   
[621] "CHRISTENSEN, Donna Marie"                "CLARKE, Yvette Diane"                   
[623] "CLAY, William Lacy, Jr."                 "CLEAVER, Emanuel, II"                   
[625] "CLYBURN, James Enos"                     "CONYERS, John, Jr."                     
[627] "CUMMINGS, Elijah Eugene"                 "DAVIS, Artur"                           
[629] "DAVIS, Danny K."                         "EDWARDS, Donna F. 18"                   
[631] "ELLISON, Keith"                          "FATTAH, Chaka"                          
[633] "FUDGE, Marcia L. 19"                     "GREEN, Al"                              
[635] "HASTINGS, Alcee Lamar"                   "JACKSON, Jesse L., Jr."                 
[637] "JACKSON LEE, Sheila"                     "JEFFERSON, William Jennings"            
[639] "JOHNSON, Eddie Bernice"                  "JOHNSON, Hank"                          
[641] "JONES, Stephanie Tubbs 20"               "KILPATRICK, Carolyn Cheeks"             
[643] "LEE, Barbara"                            "LEWIS, John R."                         
[645] "MEEK, Kendrick B."                       "MEEKS, Gregory W."                      
[647] "MILLENDER-MCDONALD, Juanita 21"          "MOORE, Gwendolynne S. (Gwen)"           
[649] "NORTON, Eleanor Holmes"                  "OBAMA, Barack 22"                       
[651] "PAYNE, Donald Milford"                   "RANGEL, Charles B."                     
[653] "RICHARDSON, Laura 23"                    "RUSH, Bobby L."                         
[655] "SCOTT, David"                            "SCOTT, Robert C."                       
[657] "THOMPSON, Bennie"                        "TOWNS, Edolphus"                        
[659] "WATERS, Maxine"                          "WATSON, Diane Edith"                    
[661] "WATT, Melvin L."                         "WYNN, Albert Russell 24"                
[663] "BISHOP, Sanford Dixon, Jr."              "BROWN, Corrine"                         
[665] "BURRIS, Roland 25"                       "BUTTERFIELD, George Kenneth, Jr. (G.K.)"
[667] "CARSON, Andre"                           "CHRISTENSEN, Donna Marie"               
[669] "CLARKE, Yvette Diane"                    "CLAY, William Lacy, Jr."                
[671] "CLEAVER, Emanuel, II"                    "CLYBURN, James Enos"                    
[673] "CONYERS, John, Jr."                      "CUMMINGS, Elijah Eugene"                
[675] "DAVIS, Artur"                            "DAVIS, Danny K."                        
[677] "EDWARDS, Donna F."                       "ELLISON, Keith"                         
[679] "FATTAH, Chaka"                           "FUDGE, Marcia L."                       
[681] "GREEN, Al"                               "HASTINGS, Alcee Lamar"                  
[683] "JACKSON, Jesse L., Jr."                  "JACKSON LEE, Sheila"                    
[685] "JOHNSON, Eddie Bernice"                  "JOHNSON, Hank"                          
[687] "KILPATRICK, Carolyn Cheeks"              "LEE, Barbara"                           
[689] "LEWIS, John R."                          "MEEK, Kendrick B."                      
[691] "MEEKS, Gregory W."                       "MOORE, Gwendolynne S. (Gwen)"           
[693] "NORTON, Eleanor Holmes"                  "PAYNE, Donald Milford"                  
[695] "RANGEL, Charles B."                      "RICHARDSON, Laura"                      
[697] "RUSH, Bobby L."                          "SCOTT, David"                           
[699] "SCOTT, Robert C."                        "THOMPSON, Bennie"                       
[701] "TOWNS, Edolphus"                         "WATERS, Maxine"                         
[703] "WATSON, Diane Edith"                     "WATT, Melvin L."                        
[705] "BASS, Karen"                             "BISHOP, Sanford Dixon, Jr."             
[707] "BROWN, Corrine"                          "BUTTERFIELD, George Kenneth, Jr. (G.K.)"
[709] "CARSON, Andre"                           "CHRISTENSEN, Donna Marie"               
[711] "CLARKE, Hansen"                          "CLARKE, Yvette Diane"                   
[713] "CLAY, William Lacy, Jr."                 "CLEAVER, Emanuel, II"                   
[715] "CLYBURN, James Enos"                     "CONYERS, John, Jr."                     
[717] "CUMMINGS, Elijah Eugene"                 "DAVIS, Danny K."                        
[719] "EDWARDS, Donna F."                       "ELLISON, Keith"                         
[721] "FATTAH, Chaka"                           "FUDGE, Marcia L."                       
[723] "GREEN, Al"                               "HASTINGS, Alcee Lamar"                  
[725] "JACKSON, Jesse L., Jr."                  "JACKSON LEE, Sheila"                    
[727] "JOHNSON, Eddie Bernice"                  "JOHNSON, Hank"                          
[729] "LEE, Barbara"                            "LEWIS, John R."                         
[731] "MEEKS, Gregory W."                       "MOORE, Gwendolynne S. (Gwen)"           
[733] "NORTON, Eleanor Holmes"                  "PAYNE, Donald Milford 26"               
[735] "PAYNE, Donald, Jr."                      "RANGEL, Charles B."                     
[737] "RICHARDSON, Laura"                       "RICHMOND, Cedric"                       
[739] "RUSH, Bobby L."                          "SCOTT, David"                           
[741] "SCOTT, Robert C."                        "SCOTT, Tim 27"                          
[743] "SEWELL, Terri"                           "THOMPSON, Bennie"                       
[745] "TOWNS, Edolphus"                         "WATERS, Maxine"                         
[747] "WATT, Melvin L."                         "WEST, Allen"                            
[749] "WILSON, Frederica"                       "ADAMS, Alma 28"                         
[751] "BASS, Karen"                             "BEATTY, Joyce"                          
[753] "BISHOP, Sanford Dixon, Jr."              "BOOKER, Cory Anthony"                   
[755] "BROWN, Corrine"                          "BUTTERFIELD, George Kenneth, Jr. (G.K.)"
[757] "CARSON, Andre"                           "CHRISTENSEN, Donna Marie"               
[759] "CLARKE, Yvette Diane"                    "CLAY, William Lacy, Jr."                
[761] "CLEAVER, Emanuel, II"                    "CLYBURN, James Enos"                    
[763] "CONYERS, John, Jr."                      "COWAN, William (Mo) 29"                 
[765] "CUMMINGS, Elijah Eugene"                 "DAVIS, Danny K."                        
[767] "EDWARDS, Donna F."                       "ELLISON, Keith"                         
[769] "FATTAH, Chaka"                           "FUDGE, Marcia L."                       
[771] "GREEN, Al"                               "HASTINGS, Alcee Lamar"                  
[773] "HORSFORD, Steven"                        "JACKSON LEE, Sheila"                    
[775] "JEFFRIES, Hakeem"                        "JOHNSON, Eddie Bernice"                 
[777] "JOHNSON, Hank"                           "KELLY, Robin L. 30"                     
[779] "LEE, Barbara"                            "LEWIS, John R."                         
[781] "MEEKS, Gregory W."                       "MOORE, Gwendolynne S. (Gwen)"           
[783] "NORTON, Eleanor Holmes"                  "PAYNE, Donald, Jr."                     
[785] "RANGEL, Charles B."                      "RICHMOND, Cedric"                       
[787] "RUSH, Bobby L."                          "SCOTT, David"                           
[789] "SCOTT, Robert C."                        "SCOTT, Tim"                             
[791] "SEWELL, Terri"                           "THOMPSON, Bennie"                       
[793] "VEASEY, Marc"                            "WATERS, Maxine"                         
[795] "WATT, Melvin L."                         "WILSON, Frederica"                      
[797] "ADAMS, Alma"                             "BASS, Karen"                            
[799] "BEATTY, Joyce"                           "BISHOP, Sanford Dixon, Jr."             
[801] "BOOKER, Cory Anthony"                    "BROWN, Corrine"                         
[803] "BUTTERFIELD, George Kenneth, Jr. (G.K.)" "CARSON, Andre"                          
[805] "CLARKE, Yvette Diane"                    "CLAY, William Lacy, Jr."                
[807] "CLEAVER, Emanuel, II"                    "CLYBURN, James Enos"                    
[809] "CONYERS, John, Jr."                      "CUMMINGS, Elijah Eugene"                
[811] "DAVIS, Danny K."                         "EDWARDS, Donna F."                      
[813] "ELLISON, Keith"                          "FATTAH, Chaka"                          
[815] "FUDGE, Marcia L."                        "GREEN, Al"                              
[817] "HASTINGS, Alcee Lamar"                   "HURD, William Ballard"                  
[819] "JACKSON LEE, Sheila"                     "JEFFRIES, Hakeem"                       
[821] "JOHNSON, Eddie Bernice"                  "JOHNSON, Hank"                          
[823] "KELLY, Robin L."                         "LAWRENCE, Brenda L"                     
[825] "LEE, Barbara"                            "LEWIS, John R."                         
[827] "LOVE, Ludmya Bourdeau (Mia)"             "MEEKS, Gregory W."                      
[829] "MOORE, Gwendolynne S. (Gwen)"            "NORTON, Eleanor Holmes"                 
[831] "PAYNE, Donald, Jr."                      "PLASKETT, Stacey M."                    
[833] "RANGEL, Charles B."                      "RICHMOND, Cedric"                       
[835] "RUSH, Bobby L."                          "SCOTT, David"                           
[837] "SCOTT, Robert C."                        "SCOTT, Tim"                             
[839] "SEWELL, Terri"                           "THOMPSON, Bennie"                       
[841] "VEASEY, Marc"                            "WATERS, Maxine"                         
[843] "WATSON COLEMAN, Bonnie"                  "WILSON, Frederica"                      

> tmpregex<-c(
+   "^([A-Z\\s\\'\\-]+)\\,\\s([A-z\\s\\.\\,\\-]+)(\\([A-z\\.\\s]+\\))?([0-9\\s]+)?$"
+ )

> tmp<-str_detect(
+   cbcdf$raw,
+   tmpregex
+ )

> cbcdf$raw[!tmp]
character(0)

> if(sum(!tmp)>0)
+   stop('havent matched all')

> #lastname
> cbcdf$lastname<-
+   str_replace(
+     cbcdf$raw,
+     tmpregex,
+     "\\1"
+   ) %>% str_replace(
+     "^\\s+|\\s+$",""
+   )

> #firstname
> cbcdf$firstname<-
+   str_replace(
+     cbcdf$raw,
+     tmpregex,
+     "\\2"
+   ) %>% str_replace(
+     "^\\s+|\\s+$",""
+   )

> #remove suffix from firstname
> fnameregex<-"^([A-z\\s\\.\\-]+)(\\,(.*))?$"

> tmp<-str_detect(cbcdf$firstname,fnameregex)

> if(sum(!tmp)>0)
+   stop('havent matched all')

> cbcdf$firstname<-str_replace(
+   cbcdf$firstname,
+   fnameregex,
+   "\\1"
+ ) %>% str_replace(
+   "^\\s+|\\s+$",""
+ )

> cbcdf$suffix<-str_replace(
+   cbcdf$firstname,
+   fnameregex,
+   "\\3"
+ ) %>% str_replace(
+   "^\\s+|\\s+$",""
+ )

> #get congress session info
> tmpregex<-"^([0-9]{2,3})(st|nd|rd|th)\\s\\(([0-9]{4})\\-([0-9]{4})\\)$"

> tmp<-str_detect(cbcdf$congress,tmpregex)

> if(sum(!tmp)>0)
+   stop('havent matched all')

> cbcdf$session<-str_replace(
+   cbcdf$congress,
+   tmpregex,
+   "\\1"
+ ) %>% as.numeric

> cbcdf$startyear<-str_replace(
+   cbcdf$congress,
+   tmpregex,
+   "\\3"
+ )

> cbcdf$endyear<-str_replace(
+   cbcdf$congress,
+   tmpregex,
+   "\\4"
+ )

> #MAKE PERSON-LEVEL
> #each row is a person-congress
> #we want it to be unique, person-level
> 
> #use FI/lastname/state_alpha2
> fi<-str_extract(cbcdf$firstname,"^[A-z]{1}")

> cbcdf$id<-paste0(
+   tolower(fi),
+   str_replace(
+     tolower(cbcdf$lastname),
+     "(\\s|\\.)",""
+   ),
+   "_",cbcdf$state_alpha2
+ )

> #inspect same ids, different raw
> tmpdf<-by(cbcdf,cbcdf$id,function(df) {
+   #df<-cbcdf[cbcdf$id=="wclay_MO",]
+   tmp<-length(unique(df$raw))
+   if(tmp>1) {
+     df
+   } else {
+     data.frame(
+       firstname=NA
+     )
+   }
+ }) %>% rbind.fill

> tmpdf<-tmpdf[!is.na(tmpdf$firstname),]

> tmpdf[,c("raw","id")]
                               raw                     id
1                   ADAMS, Alma 28              aadams_NC
2                      ADAMS, Alma              aadams_NC
5    ESPY, Alphonso Michael (Mike)               aespy_MS
6    ESPY, Alphonso Michael (Mike)               aespy_MS
7    ESPY, Alphonso Michael (Mike)               aespy_MS
8   ESPY, Alphonso Michael (Mike)6               aespy_MS
13       POWELL, Adam Clayton, Jr.             apowell_NY
14       POWELL, Adam Clayton, Jr.             apowell_NY
15       POWELL, Adam Clayton, Jr.             apowell_NY
16       POWELL, Adam Clayton, Jr.             apowell_NY
17       POWELL, Adam Clayton, Jr.             apowell_NY
18       POWELL, Adam Clayton, Jr.             apowell_NY
19       POWELL, Adam Clayton, Jr.             apowell_NY
20       POWELL, Adam Clayton, Jr.             apowell_NY
21       POWELL, Adam Clayton, Jr.             apowell_NY
22       POWELL, Adam Clayton, Jr.             apowell_NY
23       POWELL, Adam Clayton, Jr.             apowell_NY
24     POWELL, Adam Clayton, Jr. 1             apowell_NY
25       POWELL, Adam Clayton, Jr.             apowell_NY
30            WYNN, Albert Russell               awynn_MD
31            WYNN, Albert Russell               awynn_MD
32            WYNN, Albert Russell               awynn_MD
33            WYNN, Albert Russell               awynn_MD
34            WYNN, Albert Russell               awynn_MD
35            WYNN, Albert Russell               awynn_MD
36            WYNN, Albert Russell               awynn_MD
37         WYNN, Albert Russell 24               awynn_MD
43                 LEE, Barbara 14                blee_CA
44                    LEE, Barbara                blee_CA
45                    LEE, Barbara                blee_CA
46                    LEE, Barbara                blee_CA
47                    LEE, Barbara                blee_CA
48                    LEE, Barbara                blee_CA
49                    LEE, Barbara                blee_CA
50                    LEE, Barbara                blee_CA
51                    LEE, Barbara                blee_CA
52                    LEE, Barbara                blee_CA
53                   OBAMA, Barack              bobama_IL
54                OBAMA, Barack 22              bobama_IL
63       DIGGS, Charles Coles, Jr.              cdiggs_MI
64       DIGGS, Charles Coles, Jr.              cdiggs_MI
65       DIGGS, Charles Coles, Jr.              cdiggs_MI
66       DIGGS, Charles Coles, Jr.              cdiggs_MI
67       DIGGS, Charles Coles, Jr.              cdiggs_MI
68       DIGGS, Charles Coles, Jr.              cdiggs_MI
69       DIGGS, Charles Coles, Jr.              cdiggs_MI
70       DIGGS, Charles Coles, Jr.              cdiggs_MI
71       DIGGS, Charles Coles, Jr.              cdiggs_MI
72       DIGGS, Charles Coles, Jr.              cdiggs_MI
73       DIGGS, Charles Coles, Jr.              cdiggs_MI
74       DIGGS, Charles Coles, Jr.              cdiggs_MI
75     DIGGS, Charles Coles, Jr. 2              cdiggs_MI
87     CHRISTENSEN, Donna Marie 11        dchristensen_VI
88        CHRISTENSEN, Donna Marie        dchristensen_VI
89        CHRISTENSEN, Donna Marie        dchristensen_VI
90        CHRISTENSEN, Donna Marie        dchristensen_VI
91        CHRISTENSEN, Donna Marie        dchristensen_VI
92        CHRISTENSEN, Donna Marie        dchristensen_VI
93        CHRISTENSEN, Donna Marie        dchristensen_VI
94        CHRISTENSEN, Donna Marie        dchristensen_VI
95        CHRISTENSEN, Donna Marie        dchristensen_VI
97            EDWARDS, Donna F. 18            dedwards_MD
98               EDWARDS, Donna F.            dedwards_MD
99               EDWARDS, Donna F.            dedwards_MD
100              EDWARDS, Donna F.            dedwards_MD
101              EDWARDS, Donna F.            dedwards_MD
103          PAYNE, Donald Milford              dpayne_NJ
104          PAYNE, Donald Milford              dpayne_NJ
105          PAYNE, Donald Milford              dpayne_NJ
106          PAYNE, Donald Milford              dpayne_NJ
107          PAYNE, Donald Milford              dpayne_NJ
108          PAYNE, Donald Milford              dpayne_NJ
109          PAYNE, Donald Milford              dpayne_NJ
110          PAYNE, Donald Milford              dpayne_NJ
111          PAYNE, Donald Milford              dpayne_NJ
112          PAYNE, Donald Milford              dpayne_NJ
113          PAYNE, Donald Milford              dpayne_NJ
114       PAYNE, Donald Milford 26              dpayne_NJ
115             PAYNE, Donald, Jr.              dpayne_NJ
116             PAYNE, Donald, Jr.              dpayne_NJ
117             PAYNE, Donald, Jr.              dpayne_NJ
119         WATSON, Diane Edith 15             dwatson_CA
120            WATSON, Diane Edith             dwatson_CA
121            WATSON, Diane Edith             dwatson_CA
122            WATSON, Diane Edith             dwatson_CA
123            WATSON, Diane Edith             dwatson_CA
125              CLAYTON, Eva M. 4            eclayton_NC
126                CLAYTON, Eva M.            eclayton_NC
127                CLAYTON, Eva M.            eclayton_NC
128                CLAYTON, Eva M.            eclayton_NC
129                CLAYTON, Eva M.            eclayton_NC
130                CLAYTON, Eva M.            eclayton_NC
138            FLAKE, Floyd Harold              fflake_NY
139            FLAKE, Floyd Harold              fflake_NY
140            FLAKE, Floyd Harold              fflake_NY
141            FLAKE, Floyd Harold              fflake_NY
142            FLAKE, Floyd Harold              fflake_NY
143         FLAKE, Floyd Harold 13              fflake_NY
157            FORD, Harold Eugene               hford_TN
158            FORD, Harold Eugene               hford_TN
159            FORD, Harold Eugene               hford_TN
160            FORD, Harold Eugene               hford_TN
161            FORD, Harold Eugene               hford_TN
162            FORD, Harold Eugene               hford_TN
163            FORD, Harold Eugene               hford_TN
164            FORD, Harold Eugene               hford_TN
165            FORD, Harold Eugene               hford_TN
166            FORD, Harold Eugene               hford_TN
167            FORD, Harold Eugene               hford_TN
168              FORD, Harold, Jr.               hford_TN
169              FORD, Harold, Jr.               hford_TN
170              FORD, Harold, Jr.               hford_TN
171              FORD, Harold, Jr.               hford_TN
172              FORD, Harold, Jr.               hford_TN
178              CARSON, Julia May             jcarson_IN
179              CARSON, Julia May             jcarson_IN
180              CARSON, Julia May             jcarson_IN
181              CARSON, Julia May             jcarson_IN
182              CARSON, Julia May             jcarson_IN
183           CARSON, Julia May 17             jcarson_IN
194  MILLENDER-MCDONALD, Juanita 8 jmillender-mcdonald_CA
195    MILLENDER-MCDONALD, Juanita jmillender-mcdonald_CA
196    MILLENDER-MCDONALD, Juanita jmillender-mcdonald_CA
197    MILLENDER-MCDONALD, Juanita jmillender-mcdonald_CA
198    MILLENDER-MCDONALD, Juanita jmillender-mcdonald_CA
199    MILLENDER-MCDONALD, Juanita jmillender-mcdonald_CA
200 MILLENDER-MCDONALD, Juanita 21 jmillender-mcdonald_CA
208         HALL, Katie Beatrice 3               khall_IN
209           HALL, Katie Beatrice               khall_IN
211                  MFUME, Kweisi              kmfume_MD
212                  MFUME, Kweisi              kmfume_MD
213                  MFUME, Kweisi              kmfume_MD
214                  MFUME, Kweisi              kmfume_MD
215                MFUME, Kweisi 7              kmfume_MD
218           RICHARDSON, Laura 23         lrichardson_CA
219              RICHARDSON, Laura         lrichardson_CA
220              RICHARDSON, Laura         lrichardson_CA
224            FUDGE, Marcia L. 19              mfudge_OH
225               FUDGE, Marcia L.              mfudge_OH
226               FUDGE, Marcia L.              mfudge_OH
227               FUDGE, Marcia L.              mfudge_OH
228               FUDGE, Marcia L.              mfudge_OH
230                  REYNOLDS, Mel           mreynolds_IL
231                REYNOLDS, Mel 9           mreynolds_IL
240             DELLUMS, Ronald V.            rdellums_CA
241             DELLUMS, Ronald V.            rdellums_CA
242             DELLUMS, Ronald V.            rdellums_CA
243             DELLUMS, Ronald V.            rdellums_CA
244             DELLUMS, Ronald V.            rdellums_CA
245             DELLUMS, Ronald V.            rdellums_CA
246             DELLUMS, Ronald V.            rdellums_CA
247             DELLUMS, Ronald V.            rdellums_CA
248             DELLUMS, Ronald V.            rdellums_CA
249             DELLUMS, Ronald V.            rdellums_CA
250             DELLUMS, Ronald V.            rdellums_CA
251             DELLUMS, Ronald V.            rdellums_CA
252             DELLUMS, Ronald V.            rdellums_CA
253          DELLUMS, Ronald V. 12            rdellums_CA
255             KELLY, Robin L. 30              rkelly_IL
256                KELLY, Robin L.              rkelly_IL
265         JONES, Stephanie Tubbs              sjones_OH
266         JONES, Stephanie Tubbs              sjones_OH
267         JONES, Stephanie Tubbs              sjones_OH
268         JONES, Stephanie Tubbs              sjones_OH
269      JONES, Stephanie Tubbs 20              sjones_OH
272                  SCOTT, Tim 27              tscott_SC
273                     SCOTT, Tim              tscott_SC
274                     SCOTT, Tim              tscott_SC
277        CLAY, William Lacy, Sr.               wclay_MO
278        CLAY, William Lacy, Sr.               wclay_MO
279        CLAY, William Lacy, Sr.               wclay_MO
280        CLAY, William Lacy, Sr.               wclay_MO
281        CLAY, William Lacy, Sr.               wclay_MO
282        CLAY, William Lacy, Sr.               wclay_MO
283        CLAY, William Lacy, Sr.               wclay_MO
284        CLAY, William Lacy, Sr.               wclay_MO
285        CLAY, William Lacy, Sr.               wclay_MO
286        CLAY, William Lacy, Sr.               wclay_MO
287        CLAY, William Lacy, Sr.               wclay_MO
288        CLAY, William Lacy, Sr.               wclay_MO
289        CLAY, William Lacy, Sr.               wclay_MO
290        CLAY, William Lacy, Sr.               wclay_MO
291        CLAY, William Lacy, Sr.               wclay_MO
292        CLAY, William Lacy, Sr.               wclay_MO
293        CLAY, William Lacy, Jr.               wclay_MO
294        CLAY, William Lacy, Jr.               wclay_MO
295        CLAY, William Lacy, Jr.               wclay_MO
296        CLAY, William Lacy, Jr.               wclay_MO
297        CLAY, William Lacy, Jr.               wclay_MO
298        CLAY, William Lacy, Jr.               wclay_MO
299        CLAY, William Lacy, Jr.               wclay_MO
300        CLAY, William Lacy, Jr.               wclay_MO
304     GRAY, William Herbert, III               wgray_PA
305     GRAY, William Herbert, III               wgray_PA
306     GRAY, William Herbert, III               wgray_PA
307     GRAY, William Herbert, III               wgray_PA
308     GRAY, William Herbert, III               wgray_PA
309     GRAY, William Herbert, III               wgray_PA
310   GRAY, William Herbert, III 5               wgray_PA
313         TUCKER, Walter R., III             wtucker_CA
314      TUCKER, Walter R., III 10             wtucker_CA

> #fix the narcissist dups!
> tmp<-cbcdf$id=="dpayne_NJ" & 
+   str_detect(cbcdf$raw,"Jr")

> cbcdf$id[tmp]<-"dpaynejr_NJ"

> tmp<-cbcdf$id=="hford_TN" &
+   str_detect(cbcdf$raw,"Jr")

> cbcdf$id[tmp]<-"hfordjr_TN"

> tmp<-cbcdf$id=="wclay_MO" & 
+   str_detect(cbcdf$raw,"Jr")

> cbcdf$id[tmp]<-"wclayjr_MO"

> #now we get uniques
> cbcdf<-by(cbcdf,cbcdf$id,function(df) {
+   #df<-cbcdf[cbcdf$id=="tscott_SC",]
+   returnrow<-data.frame(
+     id=unique(df$id),
+     firstname=unique(df$firstname),
+     lastname=unique(df$lastname),
+     startyear=min(df$startyear),
+     endyear=max(df$endyear),
+     state_alpha2=unique(df$state_alpha2),
+     chamber=paste0(
+       unique(df$chamber),
+       collapse="/"
+     ),
+     stringsAsFactors=F
+   )
+   if(nrow(returnrow)>1)
+     stop(print(unique(df$id)))
+   returnrow
+ }) %>% rbind.fill

> tmp<-cbcdf$state_alpha2%in%membersdf$state_abbrev

> cbcdf<-cbcdf[tmp,]

> #loop through ssdf,
> #match everyone to someone
> tmpseq.i<-1:nrow(cbcdf);

> tmpoutput<-lapply(tmpseq.i,function(i) {
+   #i<-2
+   print(i)
+   
+   thisrow<-cbcdf[i,]
+   thisid<-thisrow$id
+   thisfname<-thisrow$firstname
+   thislname<-thisrow$lastname
+   thisa2<-thisrow$state_alpha2
+   
+   #match!
+   
+   #trim voters to thisa2
+   tmprows<-membersdf$state_abbrev==thisa2
+   tmpvars<-c("firstname","lastname","icpsr")
+   tmpdf<-membersdf[tmprows,tmpvars,with=F] %>%
+     unique
+   roworder<-order(tmpdf$lastname)
+   tmpdf<-tmpdf[roworder,]
+   
+   #take all the lastnames here,
+   #do we have a match?
+   lmatches<-matchme(
+     thislname,
+     tmpdf$lastname
+   )
+   
+   #take all the firstnames,
+   #do we have a match?
+   fmatches<-matchme(
+     thisfname,
+     tmpdf$firstname
+   )
+   
+   #ideally, for a match
+   #we want both
+   match<-lmatches & fmatches #both
+   pmatch<-lmatches | fmatches #either
+   
+   #browse
+   thisrow
+   tmpdf[pmatch,]
+ 
+   ####
+   #some matches done manually
+   if(thisid=="acarson_IN") 
+     match<-tmpdf$icpsr==20757
+   if(thisid=="aespy_MS")
+     match<-tmpdf$icpsr==15411
+   if(thisid=="dpayne_NJ")
+     match<-tmpdf$icpsr==15619
+   if(thisid=="hford_TN")
+     match<-tmpdf$icpsr==14224
+   if(thisid=="wclay_MO")
+     match<-tmpdf$icpsr==12009
+   if(thisid=="wclayjr_MO")
+     match<-tmpdf$icpsr==20147
+   if(thisid=="wcowan_MA")
+     match<-tmpdf$icpsr==41306
+   
+   ####
+   
+   
+   if(sum(match)==1) {
+     
+     matchdf<-data.frame(
+       row=i,
+       match="yes",
+       icpsr=tmpdf$icpsr[match],
+       firstname=thisfname,
+       lastname=thislname,
+       firstname.match=tmpdf$firstname[match],
+       lastname.match=tmpdf$lastname[match],
+       stringsAsFactors=F
+     )
+     
+   } else if(sum(match)>1) {
+     
+     matchdf<-data.frame(
+       row=i,
+       match="multi_exact",
+       icpsr=NA,
+       firstname=thisfname,
+       lastname=thislname,
+       firstname.match=paste0(
+         tmpdf$firstname[pmatch],
+         collapse=" / "
+       ),
+       lastname.match=paste0(
+         tmpdf$lastname[pmatch],
+         collapse=" / "
+       ),
+       stringsAsFactors=F
+     )
+     
+   } else if(sum(match)==0) {
+     
+     #partial match?
+     if(sum(pmatch)==1) {
+       
+       matchdf<-data.frame(
+         row=i,
+         match="yes_partial",
+         icpsr=tmpdf$icpsr[pmatch],
+         firstname=thisfname,
+         lastname=thislname,
+         firstname.match=tmpdf$firstname[pmatch],
+         lastname.match=tmpdf$lastname[pmatch],
+         stringsAsFactors=F
+       )
+       
+     } else if(sum(pmatch)>1) {
+       
+       matchdf<-data.frame(
+         row=i,
+         match="multi_partial",
+         icpsr=NA,
+         firstname=thisfname,
+         lastname=thislname,
+         firstname.match=paste0(
+           tmpdf$firstname[pmatch],
+           collapse=" / "
+         ),
+         lastname.match=paste0(
+           tmpdf$lastname[pmatch],
+           collapse=" / "
+         ),
+         stringsAsFactors=F
+       )
+       
+       
+     } else if(sum(pmatch)==0) {
+       
+       matchdf<-data.frame(
+         row=i,
+         match="no",
+         icpsr=NA,
+         firstname=thisfname,
+         lastname=thislname,
+         firstname.match=NA,
+         lastname.match=NA,
+         stringsAsFactors=F
+       )
+       
+     }
+     
+     
+   }
+   
+   
+   #return
+   matchdf
+   
+ }) 
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11
[1] 12
[1] 13
[1] 14
[1] 15
[1] 16
[1] 17
[1] 18
[1] 19
[1] 20
[1] 21
[1] 22
[1] 23
[1] 24
[1] 25
[1] 26
[1] 27
[1] 28
[1] 29
[1] 30
[1] 31
[1] 32
[1] 33
[1] 34
[1] 35
[1] 36
[1] 37
[1] 38
[1] 39
[1] 40
[1] 41
[1] 42
[1] 43
[1] 44
[1] 45
[1] 46
[1] 47
[1] 48
[1] 49
[1] 50
[1] 51
[1] 52
[1] 53
[1] 54
[1] 55
[1] 56
[1] 57
[1] 58
[1] 59
[1] 60
[1] 61
[1] 62
[1] 63
[1] 64
[1] 65
[1] 66
[1] 67
[1] 68
[1] 69
[1] 70
[1] 71
[1] 72
[1] 73
[1] 74
[1] 75
[1] 76
[1] 77
[1] 78
[1] 79
[1] 80
[1] 81
[1] 82
[1] 83
[1] 84
[1] 85
[1] 86
[1] 87
[1] 88
[1] 89
[1] 90
[1] 91
[1] 92
[1] 93
[1] 94
[1] 95
[1] 96
[1] 97
[1] 98
[1] 99
[1] 100
[1] 101
[1] 102
[1] 103
[1] 104
[1] 105
[1] 106
[1] 107
[1] 108
[1] 109
[1] 110
[1] 111
[1] 112
[1] 113
[1] 114
[1] 115
[1] 116
[1] 117
[1] 118
[1] 119
[1] 120
[1] 121
[1] 122
[1] 123
[1] 124
[1] 125
[1] 126
[1] 127
[1] 128
[1] 129
[1] 130
[1] 131
[1] 132
[1] 133
[1] 134
[1] 135
[1] 136
[1] 137
[1] 138
[1] 139
[1] 140

> #ALL YES?
> matchdf<-rbind.fill(tmpoutput)

> tmp<-matchdf$match!="yes"

> if(sum(tmp)>0)
+   stop('not all matched')

> #########################################################
> #########################################################
> 
> #use this to identify who is black and who is not
> matchdf$black<-1

> tmpdf<-matchdf[,c("icpsr","black")]

> membersdf<-merge(
+   membersdf,
+   tmpdf,
+   all=T
+ )

> tmp<-is.na(membersdf$black)

> membersdf$black[tmp]<-0

> table(membersdf$black)

    0     1 
48463   890 

> #########################################################
> #########################################################
> 
> #DEPRECATED
> # #this shows where they are in space
> # membersdf$black<-factor(membersdf$black)
> # tmpcolors<-c("grey","black")
> # names(tmpcolors)<-levels(membersdf$black)
> # 
> # require(ggplot2)
> # ggplot(
> #   membersdf,
> #   aes(
> #     x=nominate_dim1,
> #     y=nominate_dim2,
> #     color=black
> #   )
> # ) +
> #   geom_point() +
> #   scale_color_manual(
> #     values=tmpcolors
> #   )
> 
> #########################################################
> #########################################################
> 
> 
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> write.csv(
+   membersdf,
+   '02_voting_membersdf.csv',
+   row.names=F
+ )
[1] "######"
[1] "Running:"
[1] "03_merge.R"

> #########################################################
> #########################################################
> 
> #clear workspace
> rm(list=ls())

> #load packages
> require(stringr)

> require(plyr)

> require(dplyr)

> require(zoo)

> require(data.table)

> require(tidyr)

> require(rprojroot)

> #set dirs
> rootdir<-find_root(
+   criterion=has_file('_rapol.Rproj')
+ )

> codedir<-file.path(rootdir,"code")

> setwd(codedir); dir()
[1] "01_publicopinion" "02_voting"        "03_dind"          "dirs.R"          
[5] "runeverything.R" 

> source('dirs.R')

> #########################################################
> #########################################################
> 
> #READ IN DATA
> 
> #read in a catalog of votes
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> allvotesdf<-fread(
+   '02_voting_votesdf.csv'
+ )

> nrow(unique(allvotesdf))==nrow(allvotesdf)
[1] TRUE

> #MEMBER-VOTES
> #read in member-vote database
> #this is also from voteview
> setwd(datadir); dir()
 [1] "abcdfs.csv"                                 "All_Bills(1947-2017).csv"                  
 [3] "american_violence_data_20241016_201046.csv" "anesdf.csv"                                
 [5] "beodf.csv"                                  "beodf5.csv"                                
 [7] "billsdf_revised.csv"                        "cbcdf.csv"                                 
 [9] "cbsdfs.csv"                                 "citecount.csv"                             
[11] "congressmen_list.csv"                       "conventional.bib"                          
[13] "cpopdf.csv"                                 "Ethnic_Collect_Action.dta"                 
[15] "fulldf_bills.csv"                           "gallupdfs.csv"                             
[17] "gssdf.csv"                                  "Hall_votes.csv"                            
[19] "histpundf_national_220328.csv"              "housevotes_handcoded_revised.csv"          
[21] "HSall_members.csv"                          "HSall_rollcalls_withissues.csv"            
[23] "incrates_subnationalstate.csv"              "lbj1964.csv"                               
[25] "nbclatdfs.csv"                              "prezdf.csv"                                
[27] "questions_fortex.txt"                       "race_riot.dta"                             
[29] "redf.csv"                                   "roperdfs.csv"                              
[31] "tab_longviolence.csv"                       "timedfs.csv"                               
[33] "votes"                                     

> mvotesdf<-fread(
+   'Hall_votes.csv'
+ )

> nrow(unique(mvotesdf))==nrow(mvotesdf)
[1] TRUE

> mvotesdf$congress_rollnumber<-paste0(
+   mvotesdf$congress,"-",mvotesdf$rollnumber
+ )

> #MEMBER INFO
> #read in data
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> membersdf<-fread(
+   '02_voting_membersdf.csv'
+ )

> nrow(unique(membersdf))==nrow(membersdf)
[1] TRUE

> #########################################################
> #########################################################
> 
> #PUT IT ALL TOGETHER
> 
> #restrict to punitive votes and those in the house
> tmp<-(allvotesdf$punitivevote==TRUE & 
+   allvotesdf$chamber=="house")

> allvotesdf<-allvotesdf[tmp,]

> allvotesdf$congress_rollnumber<-paste0(
+   allvotesdf$congress,"-",allvotesdf$rollnumber
+ )

> #we only need those votes in mvotes
> #which are coded in punvotes as punvotes
> tmp<-allvotesdf$congress_rollnumber%in%mvotesdf$congress_rollnumber

> if(sum(!tmp)!=0) stop() 

> #997 votes; all of them are in
> tmp<-mvotesdf$congress_rollnumber%in%allvotesdf$congress_rollnumber

> mvotesdf<-mvotesdf[tmp,]

> nrow(unique(mvotesdf))==nrow(mvotesdf)
[1] TRUE

> #merge the info we have into mvotes
> mergevars<-c(
+   "congress_rollnumber",
+   "date",
+   "punitive", #identifies whether this was hand-coded
+   "yea_count",
+   "nay_count",
+   "nominate_mid_1",
+   "nominate_mid_2",
+   "nominate_log_likelihood"
+ )

> mvotesdf<-merge(
+   mvotesdf,
+   allvotesdf[,mergevars,with=F],
+   by="congress_rollnumber"
+ )

> nrow(unique(mvotesdf))==nrow(mvotesdf)
[1] TRUE

> #merge member info into mvotes
> tmporder<-order(
+   membersdf$icpsr,
+   membersdf$congress
+ )

> membersdf<-membersdf[tmporder,]

> membersdf
       icpsr congress   chamber state_icpsr district_code state_abbrev party_code occupancy
       <int>    <int>    <char>       <int>         <int>       <char>      <int>     <int>
    1:     1       82     House          36            98           ND        200         0
    2:     2       80     House          40             4           VA        100         2
    3:     2       81     House          40             4           VA        100         0
    4:     2       82     House          40             4           VA        100         0
    5:     2       83     House          40             4           VA        100         0
   ---                                                                                     
49349: 99911      111 President          99             0          USA        100         0
49350: 99911      112 President          99             0          USA        100         0
49351: 99911      113 President          99             0          USA        100         0
49352: 99912      115 President          99             0          USA        200         0
49353: 99999       29     House          44             3           GA         29         5
       last_means                 bioname bioguide_id  born  died nominate_dim1 nominate_dim2
            <int>                  <char>      <char> <int> <int>         <num>         <num>
    1:          1    AANDAHL, Fred George     A000001  1897  1966         0.230         0.085
    2:          2 ABBITT, Watkins Moorman     A000002  1908  1998         0.090         0.972
    3:          1 ABBITT, Watkins Moorman     A000002  1908  1998         0.090         0.972
    4:          1 ABBITT, Watkins Moorman     A000002  1908  1998         0.090         0.972
    5:          1 ABBITT, Watkins Moorman     A000002  1908  1998         0.090         0.972
   ---                                                                                       
49349:          0           OBAMA, Barack     O000167  1961    NA        -0.354        -0.203
49350:          0           OBAMA, Barack     O000167  1961    NA        -0.354        -0.203
49351:          0           OBAMA, Barack     O000167  1961    NA        -0.354        -0.203
49352:          0      TRUMP, Donald John              1946    NA            NA            NA
49353:          1         POE, Washington                NA    NA            NA            NA
       nominate_log_likelihood nominate_geo_mean_probability nominate_number_of_votes
                         <num>                         <num>                    <int>
    1:               -34.20484                         0.733                      110
    2:               -13.25182                         0.724                       41
    3:               -79.85688                         0.669                      199
    4:               -60.32165                         0.640                      135
    5:               -45.48059                         0.669                      113
   ---                                                                               
49349:               -16.37060                         0.923                      203
49350:               -50.85915                         0.830                      273
49351:               -41.04464                         0.882                      327
49352:                      NA                            NA                       NA
49353:                      NA                            NA                       NA
       nominate_number_of_errors conditional nokken_poole_dim1 nokken_poole_dim2 lastname
                           <int>      <lgcl>             <num>             <num>   <char>
    1:                        11          NA             0.229             0.081  AANDAHL
    2:                         2          NA            -0.105             0.669   ABBITT
    3:                        41          NA            -0.084             0.881   ABBITT
    4:                        29          NA            -0.014             0.784   ABBITT
    5:                        21          NA             0.068             0.847   ABBITT
   ---                                                                                   
49349:                         6          NA                NA                NA    OBAMA
49350:                        21          NA                NA                NA    OBAMA
49351:                        11          NA                NA                NA    OBAMA
49352:                        NA          NA                NA                NA    TRUMP
49353:                        NA          NA                NA                NA      POE
              firstname suffix black
                 <char> <char> <int>
    1:      Fred George   <NA>     0
    2:  Watkins Moorman   <NA>     0
    3:  Watkins Moorman   <NA>     0
    4:  Watkins Moorman   <NA>     0
    5:  Watkins Moorman   <NA>     0
   ---                              
49349:           Barack   <NA>     0
49350:           Barack   <NA>     0
49351:           Barack   <NA>     0
49352:      Donald John   <NA>     0
49353:       Washington   <NA>     0

> mergevars<-c(
+   "congress",
+   "black",
+   "bioname",
+   "icpsr",
+   "state_abbrev",
+   "party_code",
+   "nominate_dim1",
+   "nominate_dim2"
+ )

> mvotesdf<-merge(
+   mvotesdf,
+   unique(membersdf[,mergevars,with=F]), #this has some dups
+   by=c("congress","icpsr")
+ )

> nrow(unique(mvotesdf))==nrow(mvotesdf)
[1] TRUE

> #########################################################
> #########################################################
> 
> #classify WoT votes
> tmp <- allvotesdf$date>lubridate::ymd('2001-09-11') & 
+   (
+     (str_detect(allvotesdf$vote_desc,'terror') & 
+        !is.na(allvotesdf$vote_desc))
+   ) 

> #there are only 8 of these, 
> #not likely to make a difference
> 
> #########################################################
> #########################################################
> 
> #OUTPUT
> 
> #write out
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> write.csv(
+   mvotesdf,
+   "02_voting_fulldf.csv",
+   row.names=F
+ )

> #########################################################
> #########################################################
> 
> # #output a random sample for credibility inspection
> # #stratifeid by decade
> # #urls only start before 1973, though
> # #and are absent for some votes
> # allvotesdf$decade<-floor(year(allvotesdf$date)/10) * 10
> # 
> # #take five from each decade
> # set.seed(23)
> # decades<-unique(allvotesdf$decade)
> # sampledf<-lapply(decades,function(thisdecade) {
> #   #thisdecade<-decades[1]
> #   tmp<-allvotesdf$decade==thisdecade &
> #     is.na(allvotesdf$punitive) #one that we haven't coded
> #   sample_n(allvotesdf[tmp,],ifelse(sum(tmp)>10,10,sum(tmp)))
> # }) %>% rbind.fill
> # 
> # #keep only the cols w/ info
> # sampledf<-sampledf[,sapply(sampledf,function(x) sum(!is.na(x)))>0]
> # setwd(outputdir)
> # write.csv(
> #   sampledf,
> #   'mvotesdf_sample.csv'
> # )
[1] "######"
[1] "Running:"
[1] "04_predict.R"

> #########################################################
> #########################################################
> 
> #clear workspace
> rm(list=ls())

> #load packages
> require(stringr)

> require(plyr)

> require(dplyr)

> require(zoo)

> require(data.table)

> require(tidyr)

> require(rprojroot)

> #set dirs
> rootdir<-find_root(
+   criterion=has_file('_rapol.Rproj')
+ )

> codedir<-file.path(rootdir,"code")

> setwd(codedir); dir()
[1] "01_publicopinion" "02_voting"        "03_dind"          "dirs.R"          
[5] "runeverything.R" 

> source('dirs.R')

> #########################################################
> #########################################################
> 
> #PREDICT DIRECTION OF PUNITIVE VOTING
> #we have a handful of hand-coded punitive votes
> #we want to use this to determine punitive votes
> #in all law and order votes ever conducted in the house
> 
> #this is a prediction problem
> #we will use DW NOMINATE probability information
> #as well as information about the party of president at the time
> #to guess which direction is punitive and which direction is not
> 
> #########################################################
> #########################################################
> 
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> mvotesdf<-fread(
+   '02_voting_fulldf.csv'
+ )

> #code cast codes
> mvotesdf$vote<-NA #voted no

> tmp<-mvotesdf$cast_code%in%c(
+   1,2,3
+ )

> mvotesdf$vote[tmp]<-1 #voted yes

> tmp<-mvotesdf$cast_code%in%c(
+   4,5,6, #negative
+   7,8,9 #present or absetntion
+ )

> mvotesdf$vote[tmp]<-0

> tmp<-mvotesdf$punitive=="YES"

> mvotesdf$punitive[tmp]<-1

> tmp<-mvotesdf$punitive=="NO"

> mvotesdf$punitive[tmp]<-0

> mvotesdf$punitive<-as.numeric(mvotesdf$punitive)

> #########################################################
> #########################################################
> 
> #let's use data we have to build prediction
> mvotesdf
        congress icpsr congress_rollnumber chamber rollnumber cast_code   prob       date
           <int> <int>              <char>  <char>      <int>     <int> <char>     <IDat>
     1:       79    13               79-74   House         74         7   83.4 1945-09-20
     2:       79    39               79-74   House         74         6   99.9 1945-09-20
     3:       79   100               79-74   House         74         1   97.2 1945-09-20
     4:       79   121               79-74   House         74         7   99.9 1945-09-20
     5:       79   158               79-74   House         74         6   99.3 1945-09-20
    ---                                                                                  
432834:      115 39308             115-655   House        655         1        2017-12-05
432835:      115 39308             115-661   House        661         6   99.9 2017-12-06
432836:      115 39308             115-672   House        672         1        2017-12-11
432837:      115 39308             115-744   House        744         1  100.0 2018-01-19
432838:      115 39308             115-752   House        752         1        2018-01-29
        punitive yea_count nay_count nominate_mid_1 nominate_mid_2 nominate_log_likelihood
           <num>     <int>     <int>          <num>          <num>                   <num>
     1:       NA       165       127         -0.109         -0.148                 -29.070
     2:       NA       165       127         -0.109         -0.148                 -29.070
     3:       NA       165       127         -0.109         -0.148                 -29.070
     4:       NA       165       127         -0.109         -0.148                 -29.070
     5:       NA       165       127         -0.109         -0.148                 -29.070
    ---                                                                                   
432834:       NA       409         0          0.000          0.000                   0.000
432835:       NA       190       236         -0.156          0.665                  -8.967
432836:       NA       402         2          0.000          0.000                   0.000
432837:       NA       241       183          0.240         -0.971                 -20.594
432838:       NA       406         3          0.000          0.000                   0.000
        black                   bioname state_abbrev party_code nominate_dim1 nominate_dim2
        <int>                    <char>       <char>      <int>         <num>         <num>
     1:     0 ABERNETHY, Thomas Gerstle           MS        100         0.052         0.999
     2:     0            ADAMS, Sherman           NH        200         0.180        -0.489
     3:     0        ALLEN, Asa Leonard           LA        100        -0.118         0.789
     4:     0         ALLEN, Leo Elwood           IL        200         0.384        -0.091
     5:     0     ANDERSEN, Herman Carl           MN        200         0.294         0.107
    ---                                                                                    
432834:     0 GOODLATTE, Robert William           VA        200         0.496        -0.002
432835:     0 GOODLATTE, Robert William           VA        200         0.496        -0.002
432836:     0 GOODLATTE, Robert William           VA        200         0.496        -0.002
432837:     0 GOODLATTE, Robert William           VA        200         0.496        -0.002
432838:     0 GOODLATTE, Robert William           VA        200         0.496        -0.002
         vote
        <num>
     1:     0
     2:     0
     3:     1
     4:     0
     5:     0
    ---      
432834:     1
432835:     0
432836:     1
432837:     1
432838:     1

> #for each vote we coded, 
> #we want the nominate scores
> #in each dimension
> #of those who voted yes
> #and of those who voted otherwise
> avgsdf<-mvotesdf[
+   !is.na(punitive) & #bills we've hand-coded 
+     !is.na(vote)
+   ,
+   .(
+     punitive=unique(punitive),
+     nominate_dim1_avg=mean(nominate_dim1),
+     nominate_dim2_avg=mean(nominate_dim2)
+   )
+   ,
+   by=c("congress_rollnumber","vote")
+   ]

> avgsdf<-avgsdf[order(avgsdf$congress_rollnumber)]

> #votes of 1 on a punitive bill should be punitive
> #votes of 0 on not punitive bill are also punitive
> avgsdf$class<-0

> tmp<-(avgsdf$punitive==1 & 
+         avgsdf$vote==1) | 
+   (avgsdf$punitive==0 & 
+      avgsdf$vote==0)

> avgsdf$class[tmp]<-1

> avgsdf
    congress_rollnumber  vote punitive nominate_dim1_avg nominate_dim2_avg class
                 <char> <num>    <num>             <num>             <num> <num>
 1:             100-807     1        1      -0.006506631      -0.035673740     1
 2:             100-807     0        1      -0.290818182      -0.152927273     0
 3:             100-939     1        1      -0.028667622      -0.052432665     1
 4:             100-939     0        1      -0.101722892      -0.042903614     0
 5:             101-336     1        1      -0.038973753      -0.063057743     1
 6:             101-336     0        1      -0.086509434      -0.007018868     0
 7:             101-877     1        1      -0.033907643      -0.072066879     1
 8:             101-877     0        1      -0.078016949      -0.018406780     0
 9:            103-1005     1        1      -0.239258475      -0.115258475     1
10:            103-1005     0        1       0.180455000       0.059280000     0
11:             103-700     1        0      -0.417339623      -0.135396226     0
12:             103-700     0        0       0.071669725      -0.006155963     1
13:             103-701     0        0       0.076335385       0.008307692     1
14:             103-701     1        0      -0.422324074      -0.176527778     0
15:             103-702     1        1       0.059275964       0.008029674     1
16:             103-702     0        1      -0.424770833      -0.198656250     0
17:             103-703     0        1      -0.353033816      -0.049318841     0
18:             103-703     1        1       0.231309735      -0.027238938     1
19:             103-705     0        0       0.090698758       0.007229814     1
20:             103-705     1        0      -0.450513514      -0.168405405     0
21:             103-713     0        1      -0.401760736      -0.077515337     0
22:             103-713     1        1       0.165500000      -0.013814815     1
23:             103-714     1        0      -0.404928994      -0.037988166     0
24:             103-714     0        0       0.180420455      -0.037670455     1
25:             103-716     0        1      -0.415401515      -0.138393939     0
26:             103-716     1        1       0.113059801       0.006322259     1
27:             103-718     1        1       0.009297872      -0.011747340     1
28:             103-718     0        1      -0.426280702      -0.209614035     0
29:             103-719     0        1      -0.332679825      -0.031833333     0
30:             103-719     1        1       0.268531707      -0.044424390     1
31:             103-720     1        0      -0.354919431       0.015876777     0
32:             103-720     0        0       0.243630631      -0.088806306     1
33:             103-724     1        1      -0.020570352      -0.024608040     1
34:             103-724     0        1      -0.360428571      -0.187742857     0
35:             103-725     0        1      -0.349504505      -0.050409910     0
36:             103-725     1        1       0.266198113      -0.023500000     1
37:             103-730     0        1      -0.394909836      -0.144721311     0
38:             103-730     1        1       0.088028939       0.004151125     1
39:             103-731     1        0      -0.348178344      -0.083197452     0
40:             103-731     0        0       0.122688406      -0.011967391     1
41:             103-738     1        1      -0.186965035      -0.009097902     1
42:             103-738     0        1       0.218351351      -0.091695946     0
43:             103-983     1        1      -0.332056872      -0.062265403     1
44:             103-983     0        1       0.220844444      -0.009808889     0
45:             104-104     1        1       0.254148148       0.006521886     1
46:             104-104     0        1      -0.425855072      -0.119239130     0
47:             104-112     0        1      -0.322635294      -0.089005882     0
48:             104-112     1        1       0.270045283       0.002313208     1
49:            104-1321     1        1       0.036601078      -0.031064690     1
50:            104-1321     0        1       0.093671875      -0.104843750     0
51:             104-275     1        1       0.047507177      -0.033576555     1
52:             104-275     0        1      -0.184941176      -0.028411765     0
53:             104-955     0        1      -0.403880000      -0.101050000     0
54:             104-955     1        1       0.184975976      -0.021870871     1
55:             104-992     1        1       0.133993174      -0.013655290     1
56:             104-992     0        1      -0.131127660      -0.096078014     0
57:             106-491     1        1       0.023964539      -0.004205674     1
58:             106-491     0        1       0.041416667      -0.186083333     0
59:             107-394     0        1      -0.371342105      -0.116421053     0
60:             107-394     1        1       0.109220670       0.024237430     1
61:             107-616     0        1      -0.377656863      -0.141000000     0
62:             107-616     1        1       0.150422156       0.044209581     1
63:             108-705     0        1      -0.330544444      -0.114283333     0
64:             108-705     1        1       0.302392157       0.095552941     1
65:             109-167     0        1      -0.298367742      -0.117593548     0
66:             109-167     1        1       0.233369176       0.086193548     1
67:            110-1076     1        0      -0.082953890       0.034190202     0
68:            110-1076     0        0       0.369988372       0.039104651     1
69:             114-243     0        1      -0.221583333      -0.179250000     0
70:             114-243     1        1       0.109104762      -0.006700000     1
71:              90-107     1        1      -0.071166667       0.043348958     1
72:              90-107     0        1      -0.015367347       0.545183673     0
73:              90-335     0        0       0.001869081       0.143309192     1
74:              90-335     1        0      -0.384373333      -0.118800000     0
75:              91-284     1        1      -0.077293948       0.086354467     1
76:              91-284     0        1      -0.012361446       0.098204819     0
77:              91-300     1        1       0.010904070       0.156267442     1
78:              91-300     0        1      -0.342056818      -0.170818182     0
79:              91-355     1        0      -0.047729651       0.082267442     0
80:              91-355     0        0      -0.114372093       0.120244186     1
81:              91-367     0        1      -0.183643678       0.036873563     0
82:              91-367     1        1      -0.029965015       0.103303207     1
83:              93-117     1        0      -0.076562176       0.052686528     0
84:              93-117     0        0       0.022584906       0.135471698     1
85:              93-793     0        0       0.025056604       0.208424528     1
86:              93-793     1        0      -0.098060790       0.021656535     0
87:              98-868     1        1       0.147786290      -0.019895161     1
88:              98-868     0        1      -0.338632432      -0.053967568     0
89:              98-869     1        1      -0.103139241       0.028183544     1
90:              98-869     0        1       0.056376068      -0.203623932     0
91:              99-787     1        1      -0.022782278      -0.035205063     1
92:              99-787     0        1      -0.208918919      -0.164189189     0
    congress_rollnumber  vote punitive nominate_dim1_avg nominate_dim2_avg class

> #we want to reshape this, 
> #so that each row is a vote
> avgsdf<-gather(
+   avgsdf,
+   var,
+   val,
+   nominate_dim1_avg:nominate_dim2_avg
+ )

> avgsdf$var<-paste0(
+   avgsdf$var,
+   "_",
+   avgsdf$vote
+ )

> avgsdf$class<-avgsdf$vote<-NULL

> avgsdf<-spread(
+   avgsdf,
+   var,
+   val
+ )

> #########################################################
> #########################################################
> 
> #using information about the avg nominate scores
> #in each dimension, of those voting punitive
> #and those voting not punitive
> 
> #we wil see how we do predicting others
> #let's do this N times, and see which model is best
> #by looking at the average accuracy across splits
> 
> set.seed(23)

> tmpdf<-lapply(1:10,function(i) {
+   #i<-1
+   ind = sample(2, nrow(avgsdf), replace=TRUE, prob=c(0.7,0.3))
+   trainData = avgsdf[ind==1,]
+   testData = avgsdf[ind==2,]
+   trainData$congress_rollnumber<-NULL
+   
+   #train a simple logistic regression
+   m.logit<-glm(
+     data=trainData,
+     formula=punitive ~ .,
+     family='binomial'
+   )
+   predictions<-predict(
+     m.logit,
+     newdata=testData,
+     type='response'
+   )
+   tab_logit_20<-table(testData$punitive,as.numeric(predictions>0.2))
+   tab_logit_50<-table(testData$punitive,as.numeric(predictions>0.5))
+   tab_logit_80<-table(testData$punitive,as.numeric(predictions>0.8))
+   
+   #train a simple decision tree
+   require(rpart)
+   require(rpart.plot)
+   m.rpart<-rpart(
+     data=trainData,
+     punitive ~ .,
+     method = 'class'
+   )
+   #rpart.plot(m.rpart, extra = 106)
+   predict_punitive<-predict(m.rpart,testData,type = 'class')
+   tab_rpart<-table(testData$punitive,predict_punitive)
+   
+   #estimate accuracy of the various measures
+   tab_list<-list(
+     logit_20=tab_logit_20,
+     logit_50=tab_logit_50,
+     logit_80=tab_logit_80,
+     tab_rpart=tab_rpart
+   )
+   sapply(tab_list,function(tab) {
+     sum(diag(tab)) / sum(tab)
+   }) %>% t %>% as.data.frame
+   
+ }) %>% rbind.fill
glm.fit: algorithm did not converge 
glm.fit: fitted probabilities numerically 0 or 1 occurred 
Loading required package: rpart
 
Loading required package: rpart.plot
 
glm.fit: fitted probabilities numerically 0 or 1 occurred 
glm.fit: algorithm did not converge 
glm.fit: fitted probabilities numerically 0 or 1 occurred 
glm.fit: algorithm did not converge 
glm.fit: fitted probabilities numerically 0 or 1 occurred 

> lapply(tmpdf,mean)
$logit_20
[1] 0.7368767

$logit_50
[1] 0.7908449

$logit_80
[1] 0.7817179

$tab_rpart
[1] 0.6857677


> lapply(tmpdf,mean)
$logit_20
[1] 0.7368767

$logit_50
[1] 0.7908449

$logit_80
[1] 0.7817179

$tab_rpart
[1] 0.6857677


> #on average, logit 50 seems pretty good
> #so we proceed w/ the simplest approach 
> 
> #given the lack of data, 
> #our pref model will be a logit model fit to all the data
> regdf<-avgsdf

> regdf$congress_rollnumber<-NULL

> m.logit<-glm(
+   data=regdf,
+   formula=punitive ~ .,
+   family='binomial'
+ )

> summary(m.logit)

Call:
glm(formula = punitive ~ ., family = "binomial", data = regdf)

Coefficients:
                    Estimate Std. Error z value Pr(>|z|)  
(Intercept)            3.205      1.430   2.241   0.0250 *
nominate_dim1_avg_0   -1.956      3.816  -0.513   0.6082  
nominate_dim1_avg_1   16.713      7.957   2.100   0.0357 *
nominate_dim2_avg_0   -3.808      3.436  -1.108   0.2678  
nominate_dim2_avg_1  -16.120     12.892  -1.250   0.2112  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 50.607  on 45  degrees of freedom
Residual deviance: 20.845  on 41  degrees of freedom
AIC: 30.845

Number of Fisher Scoring iterations: 8


> #########################################################
> #########################################################
> 
> #classify everything using a simple logit
> classdf<-mvotesdf[
+   is.na(punitive) & #bills we haven't hand coded
+     !is.na(vote)
+   ,
+   .(
+     #na.rm necessary b/c some members are missing scores
+     nominate_dim1_avg=mean(nominate_dim1,na.rm=T),
+     nominate_dim2_avg=mean(nominate_dim2,na.rm=T)
+   )
+   ,
+   by=c("congress_rollnumber","vote")
+   ]

> classdf<-gather(
+   classdf,
+   var,
+   val,
+   nominate_dim1_avg:nominate_dim2_avg
+ )

> classdf$var<-paste0(
+   classdf$var,
+   "_",
+   classdf$vote
+ )

> classdf$vote<-NULL

> classdf<-spread(
+   classdf,
+   var,
+   val
+ )

> #classify!
> predict_punitive_classdf<-predict(m.logit,newdata=classdf,type='response')

> classdf$punitive<-as.numeric(predict_punitive_classdf>0.5)

> table(classdf$punitive,useNA='a') 

   0    1 <NA> 
 241  711    0 

> #merge this back in
> mvotesdf<-merge(
+   mvotesdf,
+   classdf[,c("congress_rollnumber","punitive")],
+   by='congress_rollnumber',
+   all=T
+ )

> #we now need to indicate whether our punitive vote
> #is hand-coded, or a guess based on dw-nominate scores
> mvotesdf$punitive<-mvotesdf$handcoded<-NA

> tmp<-!is.na(mvotesdf$punitive.x)

> mvotesdf$punitive[tmp]<-mvotesdf$punitive.x[tmp]

> mvotesdf$handcoded[tmp]<-T

> tmp<-!is.na(mvotesdf$punitive.y)

> mvotesdf$punitive[tmp]<-mvotesdf$punitive.y[tmp]

> mvotesdf$handcoded[tmp]<-F

> table(mvotesdf$punitive,useNA='a')

     0      1   <NA> 
109257 323581      0 

> table(mvotesdf$handcoded)

 FALSE   TRUE 
412899  19939 

> mvotesdf$punitive.x<-mvotesdf$punitive.y<-NULL

> #########################################################
> #########################################################
> 
> #BASIC VARS OF INTEREST
> 
> #get the party
> mvotesdf$party<-"Other"

> tmp<-mvotesdf$party_code==100

> mvotesdf$party[tmp]<-"Democrats"

> table(mvotesdf$party_code)

   100    200    328    329    370    402    522 
228230 203988    575     23      1      9     12 

> tmp<-mvotesdf$party_code==200

> mvotesdf$party[tmp]<-"Republicans"

> #get the year
> mvotesdf$year<-year(mvotesdf$date)

> #get groups
> mvotesdf$group<-NA

> tmp<-mvotesdf$black==0 & 
+   mvotesdf$party=="Republicans"

> mvotesdf$group[tmp]<-"Republicans"

> tmp<-mvotesdf$black==0 & 
+   mvotesdf$party=="Democrats"

> mvotesdf$group[tmp]<-"Democrats"

> tmp<-mvotesdf$black==1

> mvotesdf$group[tmp]<-"Black"

> table(mvotesdf$group)

      Black   Democrats Republicans 
      32234      196583      203401 

> #classify votes
> mvotesdf$punitive_vote<-0

> tmp<-(mvotesdf$punitive==1 &
+   mvotesdf$vote==1) |
+   (mvotesdf$punitive==0 & 
+      mvotesdf$vote==0)

> mvotesdf$punitive_vote[tmp]<-1

> #get region
> mvotesdf$state_abbrev
   [1] "FL"  "MA"  "TX"  "MI"  "MA"  "MI"  "FL"  "WI"  "OH"  "IL"  "KY"  "FL"  "IL"  "NJ" 
  [15] "IL"  "IA"  "NY"  "MS"  "IL"  "TX"  "RI"  "AZ"  "CA"  "CA"  "FL"  "CA"  "NY"  "PA" 
  [29] "TX"  "TN"  "CA"  "IL"  "MI"  "TX"  "AL"  "TN"  "WA"  "MI"  "IN"  "NJ"  "IN"  "NY" 
  [43] "NC"  "MI"  "AL"  "AR"  "OH"  "OH"  "MS"  "IN"  "AL"  "OH"  "PA"  "AR"  "CA"  "NY" 
  [57] "FL"  "MO"  "PA"  "VA"  "NY"  "NM"  "OH"  "PA"  "WI"  "TN"  "IL"  "NJ"  "TX"  "WI" 
  [71] "CA"  "MN"  "NY"  "NY"  "KY"  "NY"  "SC"  "FL"  "NY"  "FL"  "MS"  "IL"  "MA"  "CA" 
  [85] "UT"  "VA"  "OH"  "NJ"  "NC"  "CO"  "PA"  "CA"  "MA"  "MO"  "TX"  "AK"  "LA"  "IL" 
  [99] "OH"  "CA"  "PA"  "MI"  "OR"  "WA"  "MI"  "SC"  "NY"  "MA"  "OK"  "NJ"  "TN"  "OH" 
 [113] "NC"  "KY"  "NJ"  "IL"  "VT"  "NY"  "TN"  "NY"  "CA"  "CA"  "NC"  "NY"  "MN"  "IL" 
 [127] "PA"  "IN"  "NE"  "NY"  "CA"  "PA"  "HI"  "OH"  "CA"  "GA"  "CA"  "MI"  "MO"  "WA" 
 [141] "CA"  "OK"  "AL"  "MO"  "KS"  "LA"  "FL"  "GA"  "MI"  "PA"  "IA"  "MA"  "MT"  "PA" 
 [155] "OH"  "CA"  "OH"  "MI"  "WV"  "MO"  "AZ"  "MN"  "MO"  "PA"  "PA"  "OK"  "NY"  "MN" 
 [169] "LA"  "NY"  "NY"  "AR"  "NE"  "TN"  "MD"  "WY"  "PA"  "CA"  "NJ"  "CA"  "MI"  "CA" 
 [183] "MA"  "CA"  "TX"  "GA"  "PA"  "NJ"  "OH"  "KY"  "FL"  "TX"  "TX"  "CA"  "WA"  "CA" 
 [197] "FL"  "CA"  "MA"  "FL"  "CA"  "PA"  "WI"  "MN"  "WI"  "CA"  "ME"  "NY"  "TX"  "WA" 
 [211] "OK"  "IA"  "CA"  "KS"  "MT"  "MI"  "WI"  "IL"  "LA"  "MI"  "VA"  "CO"  "IN"  "PA" 
 [225] "ID"  "NE"  "ND"  "CA"  "NJ"  "CA"  "MD"  "OH"  "MO"  "TX"  "PA"  "MA"  "CT"  "NH" 
 [239] "WI"  "TX"  "UT"  "GA"  "MI"  "IN"  "CA"  "CA"  "CA"  "NY"  "IL"  "FL"  "OK"  "OH" 
 [253] "NY"  "NY"  "WA"  "KS"  "LA"  "KY"  "NJ"  "IL"  "RI"  "NY"  "FL"  "NM"  "NJ"  "OR" 
 [267] "MN"  "VA"  "NY"  "OR"  "MD"  "MS"  "OH"  "CT"  "CA"  "NY"  "TX"  "TX"  "VA"  "CA" 
 [281] "CA"  "FL"  "NY"  "PA"  "CA"  "VA"  "CA"  "TX"  "IN"  "DE"  "WA"  "NC"  "TX"  "TN" 
 [295] "OH"  "IL"  "AL"  "IL"  "OH"  "PA"  "CT"  "OH"  "OH"  "PA"  "CA"  "MI"  "CA"  "FL" 
 [309] "IL"  "FL"  "FL"  "CA"  "IN"  "WI"  "CT"  "NY"  "UT"  "VA"  "TX"  "NY"  "CA"  "MN" 
 [323] "GA"  "NM"  "PA"  "GA"  "CO"  "MN"  "VA"  "KS"  "FL"  "OR"  "SC"  "WV"  "TN"  "SC" 
 [337] "GA"  "CA"  "NJ"  "NY"  "NC"  "NV"  "MO"  "WV"  "IL"  "GA"  "WI"  "WV"  "MA"  "TX" 
 [351] "MD"  "TX"  "IL"  "TX"  "AL"  "NC"  "TX"  "TX"  "NY"  "IL"  "NJ"  "TN"  "IL"  "MI" 
 [365] "PA"  "AZ"  "IA"  "NY"  "NC"  "KS"  "WA"  "NJ"  "MI"  "VA"  "NH"  "ID"  "TX"  "GA" 
 [379] "OH"  "AR"  "CT"  "IN"  "TX"  "TX"  "SC"  "KY"  "LA"  "NC"  "NV"  "ME"  "MO"  "KY" 
 [393] "CO"  "MD"  "IL"  "OR"  "MS"  "NY"  "CA"  "IA"  "AL"  "IL"  "LA"  "CO"  "CA"  "NY" 
 [407] "LA"  "NY"  "OK"  "SD"  "IN"  "MA"  "CA"  "AZ"  "NC"  "GA"  "MD"  "MD"  "MD"  "IA" 
 [421] "VA"  "NC"  "SC"  "AZ"  "HI"  "OH"  "CO"  "NY"  "TX"  "MI"  "PA"  "FL"  "USA" "FL" 
 [435] "MA"  "TX"  "MI"  "MA"  "MI"  "FL"  "WI"  "OH"  "IL"  "KY"  "FL"  "IL"  "NJ"  "IL" 
 [449] "IA"  "NY"  "MS"  "IL"  "TX"  "RI"  "AZ"  "CA"  "CA"  "FL"  "CA"  "NY"  "PA"  "TX" 
 [463] "TN"  "CA"  "IL"  "MI"  "TX"  "AL"  "TN"  "WA"  "MI"  "IN"  "NJ"  "IN"  "NY"  "NC" 
 [477] "MI"  "AL"  "AR"  "OH"  "OH"  "MS"  "IN"  "AL"  "OH"  "PA"  "AR"  "CA"  "NY"  "FL" 
 [491] "MO"  "PA"  "VA"  "NY"  "NM"  "OH"  "PA"  "WI"  "TN"  "IL"  "NJ"  "TX"  "WI"  "CA" 
 [505] "MN"  "NY"  "NY"  "KY"  "NY"  "SC"  "FL"  "NY"  "FL"  "MS"  "IL"  "MA"  "CA"  "UT" 
 [519] "VA"  "OH"  "NJ"  "NC"  "CO"  "PA"  "CA"  "MA"  "MO"  "TX"  "AK"  "LA"  "IL"  "OH" 
 [533] "CA"  "PA"  "MI"  "OR"  "WA"  "MI"  "SC"  "NY"  "MA"  "OK"  "NJ"  "TN"  "OH"  "NC" 
 [547] "KY"  "NJ"  "IL"  "VT"  "NY"  "TN"  "NY"  "CA"  "CA"  "NC"  "NY"  "MN"  "IL"  "PA" 
 [561] "IN"  "NE"  "NY"  "CA"  "PA"  "HI"  "OH"  "CA"  "GA"  "CA"  "MI"  "MO"  "WA"  "CA" 
 [575] "OK"  "AL"  "MO"  "KS"  "LA"  "FL"  "GA"  "MI"  "PA"  "IA"  "MA"  "MT"  "PA"  "OH" 
 [589] "CA"  "OH"  "MI"  "WV"  "MO"  "AZ"  "MN"  "MO"  "PA"  "PA"  "OK"  "NY"  "MN"  "LA" 
 [603] "NY"  "NY"  "AR"  "NE"  "TN"  "MD"  "WY"  "PA"  "CA"  "NJ"  "CA"  "MI"  "CA"  "MA" 
 [617] "CA"  "TX"  "GA"  "PA"  "NJ"  "OH"  "KY"  "FL"  "TX"  "TX"  "CA"  "WA"  "CA"  "FL" 
 [631] "CA"  "MA"  "FL"  "CA"  "PA"  "WI"  "MN"  "WI"  "CA"  "ME"  "NY"  "TX"  "WA"  "OK" 
 [645] "IA"  "CA"  "KS"  "MT"  "MI"  "WI"  "IL"  "LA"  "MI"  "VA"  "CO"  "IN"  "PA"  "ID" 
 [659] "NE"  "ND"  "CA"  "NJ"  "CA"  "MD"  "OH"  "MO"  "TX"  "PA"  "MA"  "CT"  "NH"  "WI" 
 [673] "TX"  "UT"  "GA"  "MI"  "IN"  "CA"  "CA"  "CA"  "NY"  "IL"  "FL"  "OK"  "OH"  "NY" 
 [687] "NY"  "WA"  "KS"  "LA"  "KY"  "NJ"  "IL"  "RI"  "NY"  "FL"  "NM"  "NJ"  "OR"  "MN" 
 [701] "VA"  "NY"  "OR"  "MD"  "MS"  "OH"  "CT"  "CA"  "NY"  "TX"  "TX"  "VA"  "CA"  "CA" 
 [715] "FL"  "NY"  "PA"  "CA"  "VA"  "CA"  "TX"  "IN"  "DE"  "WA"  "NC"  "TX"  "TN"  "OH" 
 [729] "IL"  "AL"  "IL"  "OH"  "PA"  "CT"  "OH"  "OH"  "PA"  "CA"  "MI"  "CA"  "FL"  "IL" 
 [743] "FL"  "FL"  "CA"  "IN"  "WI"  "CT"  "NY"  "UT"  "VA"  "TX"  "NY"  "CA"  "MN"  "GA" 
 [757] "NM"  "PA"  "GA"  "CO"  "MN"  "VA"  "KS"  "FL"  "OR"  "SC"  "WV"  "TN"  "SC"  "GA" 
 [771] "CA"  "NJ"  "NY"  "NC"  "NV"  "MO"  "WV"  "IL"  "GA"  "WI"  "WV"  "MA"  "TX"  "MD" 
 [785] "TX"  "IL"  "TX"  "AL"  "NC"  "TX"  "TX"  "NY"  "IL"  "NJ"  "TN"  "IL"  "MI"  "PA" 
 [799] "AZ"  "IA"  "NY"  "NC"  "KS"  "WA"  "NJ"  "MI"  "VA"  "NH"  "ID"  "TX"  "GA"  "OH" 
 [813] "AR"  "CT"  "IN"  "TX"  "TX"  "SC"  "KY"  "LA"  "NC"  "NV"  "ME"  "MO"  "KY"  "CO" 
 [827] "MD"  "IL"  "OR"  "MS"  "NY"  "CA"  "IA"  "AL"  "IL"  "LA"  "CO"  "CA"  "NY"  "LA" 
 [841] "NY"  "OK"  "SD"  "IN"  "MA"  "CA"  "AZ"  "NC"  "GA"  "MD"  "MD"  "MD"  "IA"  "VA" 
 [855] "NC"  "SC"  "AZ"  "HI"  "OH"  "CO"  "NY"  "TX"  "MI"  "PA"  "CA"  "FL"  "USA" "FL" 
 [869] "MA"  "TX"  "MI"  "MA"  "MI"  "FL"  "WI"  "OH"  "IL"  "KY"  "FL"  "IL"  "NJ"  "IL" 
 [883] "IA"  "NY"  "MS"  "IL"  "TX"  "RI"  "AZ"  "CA"  "CA"  "FL"  "CA"  "NY"  "PA"  "TX" 
 [897] "TN"  "CA"  "IL"  "MI"  "TX"  "AL"  "TN"  "WA"  "MI"  "IN"  "NJ"  "IN"  "NY"  "NC" 
 [911] "MI"  "AL"  "AR"  "OH"  "OH"  "MS"  "IN"  "AL"  "OH"  "PA"  "AR"  "CA"  "NY"  "FL" 
 [925] "MO"  "PA"  "VA"  "NY"  "NM"  "OH"  "PA"  "WI"  "TN"  "IL"  "NJ"  "TX"  "WI"  "CA" 
 [939] "MN"  "NY"  "NY"  "KY"  "NY"  "SC"  "FL"  "NY"  "FL"  "MS"  "IL"  "MA"  "CA"  "UT" 
 [953] "VA"  "OH"  "NJ"  "NC"  "CO"  "PA"  "CA"  "MA"  "MO"  "TX"  "AK"  "LA"  "IL"  "OH" 
 [967] "CA"  "PA"  "MI"  "OR"  "WA"  "MI"  "SC"  "NY"  "MA"  "OK"  "NJ"  "TN"  "OH"  "NC" 
 [981] "KY"  "NJ"  "IL"  "VT"  "NY"  "TN"  "NY"  "CA"  "CA"  "NC"  "NY"  "MN"  "IL"  "PA" 
 [995] "IN"  "NE"  "NY"  "CA"  "PA"  "HI" 
 [ reached getOption("max.print") -- omitted 431838 entries ]

> tmpdf<-data.frame(
+   state_abbrev=state.abb,
+   state_region=state.region,
+   stringsAsFactors=F
+ )

> tmpdf$state_south <- as.numeric(tmpdf$state_region=='South')

> tmpdf$state_goldwater <- tmpdf$state_abbrev%in%c(
+   'AZ','LA','MS','AL','GA','SC'
+ )

> mvotesdf<-merge(
+   mvotesdf,
+   tmpdf,
+   by='state_abbrev',
+   all.x=T
+ )  

> #the presidents' votes aren't included; but we want to omit them anyway
> mvotesdf$bioname[is.na(mvotesdf$state_south)] %>% unique
 [1] "REAGAN, Ronald Wilson"             "BUSH, George Herbert Walker"      
 [3] "CLINTON, William Jefferson (Bill)" "BUSH, George Walker"              
 [5] "OBAMA, Barack"                     "ROOSEVELT, Franklin Delano"       
 [7] "TRUMAN, Harry S."                  "EISENHOWER, Dwight David"         
 [9] "KENNEDY, John Fitzgerald"          "JOHNSON, Lyndon Baines"           
[11] "NIXON, Richard Milhous"            "FORD, Gerald Rudolph, Jr."        
[13] "CARTER, James Earl, Jr."          

> mvotesdf<-mvotesdf[!is.na(mvotesdf$state_south),]

> #########################################################
> #########################################################
> 
> #save out
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> write.csv(
+   mvotesdf,
+   "02_voting_fulldf_classified.csv",
+   row.names=F
+ )
[1] "######"
[1] "Running:"
[1] "05_summarize.R"

> #########################################################
> #########################################################
> 
> #clear workspace
> rm(list=ls())

> #load packages
> require(stringr)

> require(plyr)

> require(dplyr)

> require(zoo)

> require(data.table)

> require(tidyr)

> require(rprojroot)

> #set dirs
> rootdir<-find_root(
+   criterion=has_file('_rapol.Rproj')
+ )

> codedir<-file.path(rootdir,"code")

> setwd(codedir); dir()
[1] "01_publicopinion" "02_voting"        "03_dind"          "dirs.R"          
[5] "runeverything.R" 

> source('dirs.R')

> #########################################################
> #########################################################
> 
> #plotting prelims
> require(ggplot2)

> require(ggthemes)

> require(extrafont)

> require(RColorBrewer)

> require(scales)

> # #load fonts
> # loadfonts(quiet=T) #register w/ pdf
> # loadfonts(device = "win",quiet=T) #register w/ windows
> # #fonts()
> # #get ghostscript, for tex output
> # gsdir<-file.path(
> #   "c:",
> #   "Program Files",
> #   "gs"
> # )
> # gsdir_full<-file.path(
> #   gsdir,
> #   dir(gsdir),
> #   "bin",
> #   "gswin64c.exe"
> # )
> # Sys.setenv(
> #   R_GSCMD = gsdir_full
> # )
> # #initialize graphlist
> # gs.list<-list()
> 
> #quick function to outputdfs
> output <- function(df,tmpname) {
+   setwd(outputdir)
+   if( str_detect(tmpname,"\\.pdf$|\\.png$") ) 
+     tmpname<-str_replace(tmpname,"\\.pdf$|\\.png$",".csv")
+   write.csv(
+     df,
+     tmpname,
+     row.names=F
+   )
+ }

> #########################################################
> #########################################################
> 
> #load classified punitive votes in the house
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> mvotesdf<-fread(
+   '02_voting_fulldf_classified.csv'
+ )

> #how many
> unique(mvotesdf$congress_rollnumber[!mvotesdf$handcoded]) %>% length
[1] 952

> unique(mvotesdf$congress_rollnumber[mvotesdf$handcoded]) %>% length
[1] 46

> #952 new; 46 handcoded
> 
> #fix groups for display clarity
> tmp<-mvotesdf$group%in%c('Republicans','Democrats')

> mvotesdf$group[tmp]<-paste0('Non-Black ',mvotesdf$group[tmp])

> #########################################################
> #########################################################
> 
> #FIG X - SUMMARIZE BY PRESIDENT
> 
> #merge info about presidents into this, for display
> require(rvest)
Loading required package: rvest
 

Attaching package: ‘rvest’

 
The following object is masked from ‘package:readr’:

    guess_encoding

 

> require(lubridate)
Loading required package: lubridate
 

Attaching package: ‘lubridate’

 
The following objects are masked from ‘package:data.table’:

    hour, isoweek, mday, minute, month, quarter, second, wday, week, yday, year

 
The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union

 

> # tmpurl <- "https://history.house.gov/Institution/Presidents-Coinciding/Presidents-Coinciding/"
> # tmpdf<-thishtml %>%
> #   html_nodes('.manual-table-not-sortable') %>%
> #   html_table()
> # tmpdf<-tmpdf[[1]]
> # setwd(datadir); fwrite(tmpdf,'prezdf.csv')
> setwd(datadir); tmpdf<-fread('prezdf.csv')

> names(tmpdf)<-tolower(names(tmpdf))

> names(tmpdf)<-c('number','president','vp','years','congresses')

> tmpdf$number<-na.locf(tmpdf$number)

> tmpdf$startdate<-str_extract(
+   tmpdf$years,
+   '.*?\\–'
+ ) %>% str_replace("\\–","")

> tmpdf$enddate<-str_extract(
+   tmpdf$years,
+   '\\–.*$'
+ ) %>% str_replace("\\–","")

> tmpdf$startdate<-mdy(tmpdf$startdate)

> tmp<-tmpdf$enddate=='present'

> tmpdf$enddate[tmp]<-paste0(month(today()),'-',day(today()),'-',year(today()))

> tmpdf$enddate<-mdy(tmpdf$enddate)

> #now, generate full datset
> tmpdf<-lapply( 1:nrow(tmpdf),function(i) {
+   #i<-60
+   #print(i)
+   thisrow<-tmpdf[i,]
+   data.frame(
+     president=str_replace_all(
+       thisrow$president,"[0-9]+","" #b/c footnotes, etc
+     ) %>% str_trim(), 
+     date=seq(
+       thisrow$startdate,
+       thisrow$enddate,
+       by='days'
+     )
+   )
+ }) %>% rbind.fill %>% data.table

> #keep only those that are in mvotesdf
> mvotesdf$date<-lubridate::ymd(mvotesdf$date)

> mvotesdf<-merge(
+   mvotesdf,
+   tmpdf,
+   by='date',
+   all.x=T
+ )

> #summarize by president
> plotdf<-mvotesdf[
+   !is.na(punitive) &
+     !is.na(group) &
+     group=='Black'
+   ,
+   .(
+     punitive_pct=100 * mean(punitive_vote,na.rm=T),
+     punitive_votes = length(unique(congress_rollnumber)),
+     start_date = min(date)
+   )
+   ,
+   by=c(
+     "group",
+     "president"
+   )
+ ]

> setorder(plotdf,group,president)

> plotdf$lastname<-str_extract(plotdf$president,'[A-z]+$')

> tmp<-plotdf$lastname=='Bush'

> plotdf$lastname[tmp]<-c('Bush I','Bush II')

> tmplevels<-plotdf$lastname[order(plotdf$start_date)]

> plotdf$lastname<-factor(plotdf$lastname,tmplevels %>% rev)

> plotdf$fill<-plotdf$lastname%in%c('Nixon','Reagan')

> tmpfills<-c('red','darkgrey'); names(tmpfills)<-c(T,F)

> g.tmp<- ggplot(
+   plotdf,
+   aes(
+     x=lastname,
+     y=punitive_pct,
+     fill=fill
+   )
+ ) + 
+   geom_bar(
+     stat='identity',
+     width=0.5,
+     color='black'
+   ) +
+   scale_fill_manual(
+     guide='none',
+     values=tmpfills
+   ) +
+   theme_bw() +
+   coord_flip() +
+   ylab("") +
+   xlab("% of Black Members Voting Punitively\n")

> setwd(outputdir)

> tmpname<-"fig_voting_presidents.png"

> ggsave(
+   plot=g.tmp,
+   filename=tmpname,
+   width=6,
+   height=10
+ )

> output(plotdf,tmpname)

> #########################################################
> #########################################################
> 
> #SUMMARIZE
> 
> #because before 1960, sample is sparse
> #and this is irrelevant for 'standard story' evaluation
> #set everythign before 1960 to 1960
> tmp<-mvotesdf$year<=1960

> mvotesdf$year[tmp]<-1960

> #fit the loess 
> sumdf<-mvotesdf[
+   !is.na(punitive) &
+     !is.na(group)
+   ,
+   .(
+     punitive_pct = 100 * mean(punitive_vote,na.rm=T),
+     punitive_votes = length(unique(congress_rollnumber))
+   )
+   ,
+   by=c(
+     "group",
+     "year"
+   )
+ ]

> setorder(sumdf,group,year)

> # #just the handcoded ones
> # sumdf2<-mvotesdf[
> #   !is.na(punitive) & 
> #     !is.na(group) &
> #     handcoded==T
> #   ,
> #   .(
> #     punitive_pct = 100 * mean(punitive_vote,na.rm=T)
> #   )
> #   ,
> #   by=c(
> #     'group',
> #     'year'
> #   )
> # ]
> 
> sumdf<-by(sumdf,sumdf$group,function(df) {
+   #df<-sumdf[sumdf$group=="Democrats",]
+   tmp<-loess(
+     data=df,
+     punitive_pct ~ year
+   ) %>% predict(df$year,se=T)
+   df$mu.loess<-tmp$fit
+   df$se.loess<-tmp$se.fit
+   df
+ }) %>% rbind.fill %>% data.table

> #generate diffdf, 
> #which is difference
> #between cbc and dems
> #and cbc and repubs
> diffdf<-by(sumdf,sumdf$year,function(df) {
+   #df<-sumdf[sumdf$year==1947,]
+   cbc<-rnorm(
+     1000,
+     mean=df$mu.loess[df$group=='Black'],
+     sd=df$se.loess[df$group=='Black']
+   )
+   dems<-rnorm(
+     1000,
+     mean=df$mu.loess[df$group=='Non-Black Democrats'],
+     sd=df$se.loess[df$group=='Non-Black Democrats']
+   )
+   repubs<-rnorm(
+     1000,
+     mean=df$mu.loess[df$group=='Non-Black Republicans'],
+     sd=df$se.loess[df$group=='Non-Black Republicans']
+   )
+   tmpdf<-rbind(
+     data.frame(quantile(dems - cbc,c(0.025,0.5,0.975)) %>% t),
+     data.frame(quantile(repubs - cbc,c(0.025,0.5,0.975)) %>% t)
+   )
+   names(tmpdf)<-c("mu.min","mu","mu.max")
+   tmpdf$group<-c("dems","repubs")
+   tmpdf$year<-unique(df$year)
+   tmpdf
+ }) %>% rbind.fill

> #########################################################
> #########################################################
> 
> #FIG 5 - SUMMARY OF LEVELS
> 
> # #load
> # setwd(codedir); source('genconventional.R')
> 
> plotdf<-sumdf

> #plotdf$facet<-"estimated"
> plotdf$yhat<-plotdf$mu.loess

> # #add conventional view
> # loopdf<-data.frame(
> #   sumcat=c(
> #     'Black',
> #     'Non-Black Democrats',
> #     'Non-Black Republicans'
> #   )
> # )
> # 
> # # c<-30
> # # loopdf$startpoint<-c(
> # #   50-c, 
> # #   50,
> # #   50+0.9*c
> # # )
> # # loopdf$endpoint<-c(
> # #   50-c,
> # #   50+c,
> # #   50+c
> # # )
> 
> # tmpseq.i<-1:nrow(loopdf)
> # tmpdf<-lapply(tmpseq.i,function(i) {
> #   #i<-2
> #   print(i)
> #   endyr<-max(plotdf$year)
> #   styr<-min(plotdf$year)
> #   thisrow<-loopdf[i,]
> #   # m<-(thisrow$endpoint-thisrow$startpoint)/(endyr-styr)
> #   # b<-thisrow$startpoint - (styr * m)
> #   # fun.y<-function(x) {
> #   #   m * x + b
> #   # }
> #   # yhat<-sapply(
> #   #   styr:endyr,
> #   #   fun.y
> #   # )
> #   
> #   if(thisrow=='Black') {
> #     yhat<-20
> #   } else if(thisrow=='Non-Black Democrats') {
> #     yhat<-genconventional(40,70,80)[-(1),2]
> #   } else if(thisrow=='Non-Black Republicans') {
> #     yhat<-genconventional(55,80,80)[-(1),2]
> #   }
> #   data.frame(
> #     group=thisrow,
> #     year=styr:endyr,
> #     mu.loess=yhat,
> #     stringsAsFactors=F
> #   )
> # }) %>% rbind.fill
> # tmpdf$facet<-"conventional"
> 
> 
> # plotdf<-rbind.fill(
> #   plotdf,
> #   tmpdf
> # )
> 
> # tmplevels<-c(
> #   "conventional",
> #   "estimated"
> # )
> # tmplabels<-c(
> #   "Conventional View",
> #   "Estimated"
> # )
> # plotdf$facet<-factor(
> #   plotdf$facet,
> #   tmplevels,
> #   tmplabels
> # )
> 
> tmplevels<-c(
+   "Non-Black Democrats",
+   "Non-Black Republicans",
+   "Black"
+ )

> plotdf$group<-factor(
+   plotdf$group,
+   tmplevels
+ )

> tmpcolors<-c(
+   "Blue",
+   "Red",
+   "Black"
+ )

> names(tmpcolors)<-levels(plotdf$group)

> g.tmp<-ggplot(
+   plotdf,
+   aes(
+     x=year,
+     y=mu.loess,
+     group=group,
+     color=group
+   )
+ ) +
+   geom_line(linewidth=1) +
+   scale_color_manual(
+     name="",
+     values=tmpcolors
+   ) +
+   xlab("") +
+   ylab("% Voting Punitive\n") +
+   theme_bw() +
+   theme(
+     legend.position = 'bottom',
+     legend.direction = 'horizontal'
+   )

> setwd(outputdir)

> tmpname<-"fig_voting_levels.png"

> ggsave(
+   plot=g.tmp,
+   filename=tmpname,
+   width=5,
+   height=5,
+   dpi=300
+ )

> output(plotdf,tmpname)

> ggsave(
+   plot=g.tmp,
+   filename='CleggFig5.pdf',
+   width=5,
+   height=5,
+   dpi=300
+ )

> #########################################################
> #########################################################
> 
> #FIG X - PLOT THE DIFFERENCES
> 
> plotdf<-diffdf

> tmplevels<-c(
+   "dems",
+   "repubs"
+ )

> tmplabels<-c(
+   "Non-Black Democrats",
+   "Non-Black Republicans"
+ )

> plotdf$group<-factor(
+   plotdf$group,
+   tmplevels,
+   tmplabels
+ )

> tmpcolors2<-c(
+   "Blue",
+   "Red"
+ )

> names(tmpcolors2)<-levels(plotdf$group)

> g.tmp<-ggplot(
+   plotdf,
+   aes(
+     x=year,
+     y=mu,
+     ymin=mu.min,
+     ymax=mu.max,
+     color=group
+   )
+ ) +
+   geom_line(
+     size=1
+   ) +
+   geom_hline(
+     yintercept=0,
+     linetype='dashed'
+   ) +
+   geom_ribbon(
+     alpha=0.25,
+     color='grey'
+   ) +
+   scale_color_manual(
+     name="",
+     values=tmpcolors2,
+     guide='none'
+   ) +
+   facet_wrap(
+     ~ group,
+     ncol=1
+   ) +
+   theme_bw() +
+   xlab("") +
+   ylab("Punitiveness Gap to Black Members\n")

> setwd(outputdir)

> tmpname<-"fig_voting_differences.png"

> ggsave(
+   plot=g.tmp,
+   filename=tmpname,
+   width=5,
+   height=8
+ )

> output(plotdf,tmpname)

> #########################################################
> #########################################################
> 
> #FIG 6 - SOUTH VS. NON-SOUTH 
> 
> #split the sample by 1964 eleciton returns
> #goldwater states, all other states, very liberal states
> require(rvest)

> #scraped on 6/17/2025
> # tmpurl<-'https://en.wikipedia.org/wiki/1964_United_States_presidential_election'
> # myhtml <- read_html(tmpurl) %>% html_nodes('table')
> # tmpdf <- myhtml[[19]] %>% html_table
> # setwd(datadir); fwrite(tmpdf,'lbj1964.csv')
> setwd(datadir); tmpdf <- fread('lbj1964.csv')

> tmpdf <- tmpdf[-(1),c(1,15)] #this is the margin in favor of LBJ

> names(tmpdf)<-c('statename','lbjmargin')

> tmpdf$lbjmargin<-str_replace_all(tmpdf$lbjmargin,"\\,","")

> tmpnegative<-str_detect(tmpdf$lbjmargin,"\\−")

> tmpdf$lbjmargin<-str_replace(tmpdf$lbjmargin,"\\−","")

> tmpdf$lbjmargin<-as.numeric(tmpdf$lbjmargin)
NAs introduced by coercion 

> tmpdf$lbjmargin[tmpnegative]<- -1 * tmpdf$lbjmargin[tmpnegative]

> sdf<-data.frame(
+   statename=state.name,
+   state_abbrev=state.abb
+ )

> tmp<-tmpdf$statename%in%sdf$statename

> tmpdf$statename[!tmp] #just DC missing
[1] "State"   "D. C."   "TOTALS:"

> tmpdf<-merge(
+   tmpdf,
+   sdf
+ )

> tmpdf$state_1964 <- NA

> #5 goldwater states, 5 most liberal states, and the remainder
> tmpdf<-tmpdf[order(tmpdf$lbjmargin),]

> nrow(tmpdf)
[1] 50

> tmpdf$state_1964[1:5]<-'Goldwater'

> tmpdf$state_1964[6:45]<-'LBJ (Rest)'

> tmpdf$state_1964[46:50]<-'LBJ (Top 5)'

> tmpdf$state_1964 <- factor(tmpdf$state_1964,c('Goldwater','LBJ (Rest)','LBJ (Top 5)'))

> mvotesdf <- merge(
+   mvotesdf,
+   tmpdf[,c('state_abbrev','state_1964')],
+   all.x=T,
+   by=c('state_abbrev')
+ )

> sumdf<-mvotesdf[
+   !is.na(punitive) &
+     !is.na(group) & 
+     group!='Black'
+   ,
+   .(
+     punitive_pct = 100 * mean(punitive_vote,na.rm=T),
+     punitive_votes = length(unique(congress_rollnumber))
+   )
+   ,
+   by=c(
+     #"group",
+     'state_1964',
+     "year"
+   )
+ ]

> setorder(sumdf,year)#group,year)

> sumdf<-by(sumdf,sumdf$state_1964,function(df) { #list(sumdf$group,sumdf$state_goldwater),function(df) {
+   #df<-sumdf[sumdf$group=="Democrats",]
+   tmp<-loess(
+     data=df,
+     punitive_pct ~ year
+   ) %>% predict(df$year,se=T)
+   df$mu.loess<-tmp$fit
+   df$se.loess<-tmp$se.fit
+   df
+ }) %>% rbind.fill %>% data.table

> plotdf<-sumdf

> plotdf$facet<-"estimated"

> plotdf$yhat<-plotdf$mu.loess

> plotdf <- plotdf[plotdf$state_1964%in%c('Goldwater','LBJ (Top 5)')]

> g.tmp <- ggplot(
+   plotdf,
+   aes(
+     x=year,
+     y=yhat,
+     #ymin=mu.loess - 1.96*se.loess,
+     #ymax=mu.loess + 1.96*se.loess,
+     group=state_1964,
+     linetype=state_1964#,
+     #color=group
+   )
+ ) +
+   geom_line(size=1) +
+   scale_color_manual(
+     name="",
+     values=tmpcolors
+   ) +
+   scale_linetype_discrete(
+     name=""
+   ) +
+   #facet_wrap( ~ group) +
+   xlab("") +
+   ylab("% Voting Punitive\n") +
+   # facet_wrap(
+   #   ~ facet,
+   #   ncol=1
+   # ) +
+   theme_bw() +
+   theme(
+     legend.position = 'bottom',
+     legend.direction = 'horizontal',
+     panel.spacing.x = unit(2,'lines')
+   )

> setwd(outputdir)

> tmpname<-"fig_voting_levels_south.pdf"

> ggsave(
+   plot=g.tmp,
+   filename=tmpname,
+   width=4*1.25,
+   height=4*1.25,
+   dpi=300
+ )
No shared levels found between `names(values)` of the manual scale and the data's colour
values. 

> output(plotdf,tmpname)

> ggsave(
+   plot=g.tmp,
+   filename='CleggFig6.pdf',
+   width=4*1.25,
+   height=4*1.25,
+   dpi=300
+ )
No shared levels found between `names(values)` of the manual scale and the data's colour
values. 

> #for calculations
> tmpdf<-spread(
+   plotdf[,c('year','state_1964','mu.loess')],
+   state_1964,
+   mu.loess
+ )

> tmpdf$diff <- tmpdf$Goldwater - tmpdf$`LBJ (Top 5)`

> tmpdf$diff
 [1]  5.744400  5.570078  5.421467  5.296930  5.051415  5.006823  4.978128  4.963695  4.948718
[10]  4.927137  4.910437  4.910103  4.937620  5.004471  5.122143  5.316377  5.594720  5.941355
[19]  6.340466  6.776238  7.232855  7.694502  8.127563  8.524171  8.907135  9.299265  9.723370
[28] 10.202258 10.758739 11.509179 12.490152 13.604780 14.756184 15.847485 16.781804 17.462262
[37] 17.923936 18.282855 18.563575 18.790649 18.988633 19.182080 19.395544 19.604832 19.772117
[46] 19.901508 19.997112 20.063036 20.103388 20.122276 20.120912 20.096921 20.049027 19.975954
[55] 19.876425 19.749167 19.592901

> goldwater_increase <- 
+   (tmpdf$Goldwater[tmpdf$year==2018] - tmpdf$Goldwater[tmpdf$year==1960])/
+   tmpdf$Goldwater[tmpdf$year==1960] #105% increase

> lbj_increase <- 
+   (tmpdf$`LBJ (Top 5)`[tmpdf$year==2018] - tmpdf$`LBJ (Top 5)`[tmpdf$year==1960])/
+   tmpdf$`LBJ (Top 5)`[tmpdf$year==1960] #84% increase

> lbj_increase/goldwater_increase #80% of the goldwater increase happens in the lbj case, too
[1] 0.8032518

> ## run d-in-d scripts (Section 5)
> setwd(dcodedir); dir()
 [1] "01_dindmods.R"           "02_regmods.R"            "03_regmods_robustness.R"
 [4] "04_summarize.R"          "beofunctions.R"          "beofunctions2.R"        
 [7] "checkunitroots.R"        "dirs.R"                  "getestimates.R"         
[10] "getlongrun2.R"           "XX_figures.R"            "XX_footnotes.R"         
[13] "XX_intmods.R"            "XX_runall.R"             "XX_tables.R"            
[16] "XX_unitroots.R"          "XX_unitroots_output.R"   "XX_unitroots2.R"        
[19] "XX_writeplots.R"        

> myfiles <- dir()[str_detect(dir(), '^[0-9]{2}')]

> for (myfile in myfiles) {
+   print("######")
+   print("Running:")
+   print(myfile)
+ 
+   pcodedir <- file.path(
+     find_root(criterion = has_ .... [TRUNCATED] 
[1] "######"
[1] "Running:"
[1] "01_dindmods.R"

> #########################################################
> #########################################################
> 
> #clear workspace
> rm(list=ls())

> #load packages
> require(stringr)

> require(plyr)

> require(dplyr)

> require(zoo)

> require(data.table)

> require(tidyr)

> require(rprojroot)

> #extra
> require(lmtest)

> require(boot)

> require(plm)

> require(MASS)

> #set dirs
> rootdir<-find_root(
+   criterion=has_file('_rapol.Rproj')
+ )

> codedir<-file.path(rootdir,"code")

> setwd(codedir); dir()
[1] "01_publicopinion" "02_voting"        "03_dind"          "dirs.R"          
[5] "runeverything.R" 

> source('dirs.R')

> #load helper functions
> setwd(dcodedir)

> source('beofunctions.R')

> source('beofunctions2.R')

> #set seed
> set.seed(23)

> reps<-1000

> boot.reps<-10

> #########################################################
> #########################################################
> 
> #PRELIMINARIES
> 
> #LOAD BEODF
> setwd(datadir); dir()
 [1] "abcdfs.csv"                                 "All_Bills(1947-2017).csv"                  
 [3] "american_violence_data_20241016_201046.csv" "anesdf.csv"                                
 [5] "beodf.csv"                                  "beodf5.csv"                                
 [7] "billsdf_revised.csv"                        "cbcdf.csv"                                 
 [9] "cbsdfs.csv"                                 "citecount.csv"                             
[11] "congressmen_list.csv"                       "conventional.bib"                          
[13] "cpopdf.csv"                                 "Ethnic_Collect_Action.dta"                 
[15] "fulldf_bills.csv"                           "gallupdfs.csv"                             
[17] "gssdf.csv"                                  "Hall_votes.csv"                            
[19] "histpundf_national_220328.csv"              "housevotes_handcoded_revised.csv"          
[21] "HSall_members.csv"                          "HSall_rollcalls_withissues.csv"            
[23] "incrates_subnationalstate.csv"              "lbj1964.csv"                               
[25] "nbclatdfs.csv"                              "prezdf.csv"                                
[27] "questions_fortex.txt"                       "race_riot.dta"                             
[29] "redf.csv"                                   "roperdfs.csv"                              
[31] "tab_longviolence.csv"                       "timedfs.csv"                               
[33] "votes"                                     

> beodf<-read.csv(
+   'beodf.csv',
+   stringsAsFactors=F
+ )

> head(beodf)
  state_alpha2 statename region division year imprt_t_jur officers_pcap incrt_t_jur
1           AK    Alaska   West  Pacific 1960          NA            NA          NA
2           AK    Alaska   West  Pacific 1961          NA            NA          NA
3           AK    Alaska   West  Pacific 1962          NA            NA          NA
4           AK    Alaska   West  Pacific 1963          NA            NA          NA
  lncorrections_pcap lnpolicesp_pcap beopct_all pct_blackpop violent_crt all_allraces_loess
1           2.185434        3.263892         NA           NA       104.3          0.3851229
2           3.665997        3.499290         NA           NA        88.9          0.3917353
3           3.877588        3.615223         NA           NA        91.5          0.3986436
4           3.944476        3.694098         NA           NA       109.7          0.4058374
  punitive_allraces_loess mistrust_allraces_loess anxiety_allraces_loess all_black_loess
1               0.3624622                      NA                     NA       0.2747111
2               0.3666322                      NA                     NA       0.2779276
3               0.3713316                      NA                     NA       0.2816064
4               0.3765466                      NA                     NA       0.2857372
  punitive_black_loess mistrust_black_loess anxiety_black_loess demcontrol.klarner txtot_pc
1            0.2625864                   NA                  NA                  1       NA
2            0.2635252                   NA                  NA                  1       NA
3            0.2650784                   NA                  NA                  1       NA
4            0.2672335                   NA                  NA                  0       NA
  unemprate lnrealinc_pc      gini welfbenefits crack_index povertyrate  growthrate
1        NA     9.996937 0.4385357     258.0002          NA       19.00          NA
2        NA     9.944539 0.4394296     257.9575          NA       18.36 -0.05239839
3  6.117965     9.916330 0.4513472     257.9148          NA       17.72 -0.02820903
4 11.454868     9.932716 0.4146536     257.8722          NA       17.08  0.01638665
  L.imprt_t_jur L2.imprt_t_jur L3.imprt_t_jur L4.imprt_t_jur D.imprt_t_jur L.D.imprt_t_jur
1            NA             NA             NA             NA            NA              NA
2            NA             NA             NA             NA            NA              NA
3            NA             NA             NA             NA            NA              NA
4            NA             NA             NA             NA            NA              NA
  L2.D.imprt_t_jur L.officers_pcap L2.officers_pcap L3.officers_pcap L4.officers_pcap
1               NA              NA               NA               NA               NA
2               NA              NA               NA               NA               NA
3               NA              NA               NA               NA               NA
4               NA              NA               NA               NA               NA
  D.officers_pcap L.D.officers_pcap L2.D.officers_pcap L.incrt_t_jur L2.incrt_t_jur
1              NA                NA                 NA            NA             NA
2              NA                NA                 NA            NA             NA
3              NA                NA                 NA            NA             NA
4              NA                NA                 NA            NA             NA
  L3.incrt_t_jur L4.incrt_t_jur D.incrt_t_jur L.D.incrt_t_jur L2.D.incrt_t_jur
1             NA             NA            NA              NA               NA
2             NA             NA            NA              NA               NA
3             NA             NA            NA              NA               NA
4             NA             NA            NA              NA               NA
  L.lncorrections_pcap L2.lncorrections_pcap L3.lncorrections_pcap L4.lncorrections_pcap
1                   NA                    NA                    NA                    NA
2             2.185434                    NA                    NA                    NA
3             3.665997              2.185434                    NA                    NA
4             3.877588              3.665997              2.185434                    NA
  D.lncorrections_pcap L.D.lncorrections_pcap L2.D.lncorrections_pcap L.lnpolicesp_pcap
1                   NA                     NA                      NA                NA
2           1.48056301                     NA                      NA          3.263892
3           0.21159057              1.4805630                      NA          3.499290
4           0.06688816              0.2115906                1.480563          3.615223
  L2.lnpolicesp_pcap L3.lnpolicesp_pcap L4.lnpolicesp_pcap D.lnpolicesp_pcap
1                 NA                 NA                 NA                NA
2                 NA                 NA                 NA        0.23539862
3           3.263892                 NA                 NA        0.11593320
4           3.499290           3.263892                 NA        0.07887456
  L.D.lnpolicesp_pcap L2.D.lnpolicesp_pcap L.beopct_all L2.beopct_all L3.beopct_all
1                  NA                   NA           NA            NA            NA
2                  NA                   NA           NA            NA            NA
3           0.2353986                   NA           NA            NA            NA
4           0.1159332            0.2353986           NA            NA            NA
  L4.beopct_all D.beopct_all L.D.beopct_all L2.D.beopct_all L.pct_blackpop L2.pct_blackpop
1            NA           NA             NA              NA             NA              NA
2            NA           NA             NA              NA             NA              NA
3            NA           NA             NA              NA             NA              NA
4            NA           NA             NA              NA             NA              NA
  L3.pct_blackpop L4.pct_blackpop D.pct_blackpop L.D.pct_blackpop L2.D.pct_blackpop
1              NA              NA             NA               NA                NA
2              NA              NA             NA               NA                NA
3              NA              NA             NA               NA                NA
4              NA              NA             NA               NA                NA
  L.violent_crt L2.violent_crt L3.violent_crt L4.violent_crt D.violent_crt L.D.violent_crt
1            NA             NA             NA             NA            NA              NA
2         104.3             NA             NA             NA    -15.400002              NA
3          88.9          104.3             NA             NA      2.599998      -15.400002
4          91.5           88.9          104.3             NA     18.199997        2.599998
  L2.D.violent_crt L.all_allraces_loess L2.all_allraces_loess L3.all_allraces_loess
1               NA                   NA                    NA                    NA
2               NA            0.3851229                    NA                    NA
3               NA            0.3917353             0.3851229                    NA
4            -15.4            0.3986436             0.3917353             0.3851229
  L4.all_allraces_loess D.all_allraces_loess L.D.all_allraces_loess L2.D.all_allraces_loess
1                    NA                   NA                     NA                      NA
2                    NA          0.006612434                     NA                      NA
3                    NA          0.006908278            0.006612434                      NA
4                    NA          0.007193732            0.006908278             0.006612434
  L.punitive_allraces_loess L2.punitive_allraces_loess L3.punitive_allraces_loess
1                        NA                         NA                         NA
2                 0.3624622                         NA                         NA
3                 0.3666322                  0.3624622                         NA
4                 0.3713316                  0.3666322                  0.3624622
  L4.punitive_allraces_loess D.punitive_allraces_loess L.D.punitive_allraces_loess
1                         NA                        NA                          NA
2                         NA               0.004169969                          NA
3                         NA               0.004699458                 0.004169969
4                         NA               0.005215021                 0.004699458
  L2.D.punitive_allraces_loess L.mistrust_allraces_loess L2.mistrust_allraces_loess
1                           NA                        NA                         NA
2                           NA                        NA                         NA
3                           NA                        NA                         NA
4                  0.004169969                        NA                         NA
  L3.mistrust_allraces_loess L4.mistrust_allraces_loess D.mistrust_allraces_loess
1                         NA                         NA                        NA
2                         NA                         NA                        NA
3                         NA                         NA                        NA
4                         NA                         NA                        NA
  L.D.mistrust_allraces_loess L2.D.mistrust_allraces_loess L.anxiety_allraces_loess
1                          NA                           NA                       NA
2                          NA                           NA                       NA
3                          NA                           NA                       NA
4                          NA                           NA                       NA
  L2.anxiety_allraces_loess L3.anxiety_allraces_loess L4.anxiety_allraces_loess
1                        NA                        NA                        NA
2                        NA                        NA                        NA
3                        NA                        NA                        NA
4                        NA                        NA                        NA
  D.anxiety_allraces_loess L.D.anxiety_allraces_loess L2.D.anxiety_allraces_loess
1                       NA                         NA                          NA
2                       NA                         NA                          NA
3                       NA                         NA                          NA
4                       NA                         NA                          NA
  L.all_black_loess L2.all_black_loess L3.all_black_loess L4.all_black_loess
1                NA                 NA                 NA                 NA
2         0.2747111                 NA                 NA                 NA
3         0.2779276          0.2747111                 NA                 NA
4         0.2816064          0.2779276          0.2747111                 NA
  D.all_black_loess L.D.all_black_loess L2.D.all_black_loess L.punitive_black_loess
1                NA                  NA                   NA                     NA
2       0.003216492                  NA                   NA              0.2625864
3       0.003678839         0.003216492                   NA              0.2635252
4       0.004130762         0.003678839          0.003216492              0.2650784
  L2.punitive_black_loess L3.punitive_black_loess L4.punitive_black_loess
1                      NA                      NA                      NA
2                      NA                      NA                      NA
3               0.2625864                      NA                      NA
4               0.2635252               0.2625864                      NA
  D.punitive_black_loess L.D.punitive_black_loess L2.D.punitive_black_loess
1                     NA                       NA                        NA
2           0.0009388699                       NA                        NA
3           0.0015532011             0.0009388699                        NA
4           0.0021551201             0.0015532011              0.0009388699
  L.mistrust_black_loess L2.mistrust_black_loess L3.mistrust_black_loess
1                     NA                      NA                      NA
2                     NA                      NA                      NA
3                     NA                      NA                      NA
4                     NA                      NA                      NA
  L4.mistrust_black_loess D.mistrust_black_loess L.D.mistrust_black_loess
1                      NA                     NA                       NA
2                      NA                     NA                       NA
3                      NA                     NA                       NA
4                      NA                     NA                       NA
  L2.D.mistrust_black_loess L.anxiety_black_loess L2.anxiety_black_loess
1                        NA                    NA                     NA
2                        NA                    NA                     NA
3                        NA                    NA                     NA
4                        NA                    NA                     NA
  L3.anxiety_black_loess L4.anxiety_black_loess D.anxiety_black_loess L.D.anxiety_black_loess
1                     NA                     NA                    NA                      NA
2                     NA                     NA                    NA                      NA
3                     NA                     NA                    NA                      NA
4                     NA                     NA                    NA                      NA
  L2.D.anxiety_black_loess L.demcontrol.klarner L2.demcontrol.klarner L3.demcontrol.klarner
1                       NA                   NA                    NA                    NA
2                       NA                    1                    NA                    NA
3                       NA                    1                     1                    NA
4                       NA                    1                     1                     1
  L4.demcontrol.klarner D.demcontrol.klarner L.D.demcontrol.klarner L2.D.demcontrol.klarner
1                    NA                   NA                     NA                      NA
2                    NA                    0                     NA                      NA
3                    NA                    0                      0                      NA
4                    NA                   -1                      0                       0
  L.txtot_pc L2.txtot_pc L3.txtot_pc L4.txtot_pc D.txtot_pc L.D.txtot_pc L2.D.txtot_pc
1         NA          NA          NA          NA         NA           NA            NA
2         NA          NA          NA          NA         NA           NA            NA
3         NA          NA          NA          NA         NA           NA            NA
4         NA          NA          NA          NA         NA           NA            NA
  L.unemprate L2.unemprate L3.unemprate L4.unemprate D.unemprate L.D.unemprate L2.D.unemprate
1          NA           NA           NA           NA          NA            NA             NA
2          NA           NA           NA           NA          NA            NA             NA
3          NA           NA           NA           NA          NA            NA             NA
4    6.117965           NA           NA           NA    5.336903            NA             NA
  L.lnrealinc_pc L2.lnrealinc_pc L3.lnrealinc_pc L4.lnrealinc_pc D.lnrealinc_pc
1             NA              NA              NA              NA             NA
2       9.996937              NA              NA              NA    -0.05239839
3       9.944539        9.996937              NA              NA    -0.02820903
4       9.916330        9.944539        9.996937              NA     0.01638665
  L.D.lnrealinc_pc L2.D.lnrealinc_pc L.growthrate L2.growthrate L3.growthrate L4.growthrate
1               NA                NA           NA            NA            NA            NA
2               NA                NA           NA            NA            NA            NA
3      -0.05239839                NA  -0.05239839            NA            NA            NA
4      -0.02820903       -0.05239839  -0.02820903   -0.05239839            NA            NA
  D.growthrate L.D.growthrate L2.D.growthrate    L.gini   L2.gini   L3.gini L4.gini
1           NA             NA              NA        NA        NA        NA      NA
2           NA             NA              NA 0.4385357        NA        NA      NA
3   0.02418936             NA              NA 0.4394296 0.4385357        NA      NA
4   0.04459568     0.02418936              NA 0.4513472 0.4394296 0.4385357      NA
        D.gini    L.D.gini   L2.D.gini L.welfbenefits L2.welfbenefits L3.welfbenefits
1           NA          NA          NA             NA              NA              NA
2  0.000893831          NA          NA       258.0002              NA              NA
3  0.011917681 0.000893831          NA       257.9575        258.0002              NA
4 -0.036693663 0.011917681 0.000893831       257.9148        257.9575        258.0002
  L4.welfbenefits D.welfbenefits L.D.welfbenefits L2.D.welfbenefits L.crack_index
1              NA             NA               NA                NA            NA
2              NA    -0.04266732               NA                NA            NA
3              NA    -0.04266732      -0.04266732                NA            NA
4              NA    -0.04266732      -0.04266732       -0.04266732            NA
  L2.crack_index L3.crack_index L4.crack_index D.crack_index L.D.crack_index L2.D.crack_index
1             NA             NA             NA            NA              NA               NA
2             NA             NA             NA            NA              NA               NA
3             NA             NA             NA            NA              NA               NA
4             NA             NA             NA            NA              NA               NA
  L.povertyrate L2.povertyrate L3.povertyrate L4.povertyrate D.povertyrate L.D.povertyrate
1            NA             NA             NA             NA            NA              NA
2         19.00             NA             NA             NA         -0.64              NA
3         18.36          19.00             NA             NA         -0.64           -0.64
4         17.72          18.36             19             NA         -0.64           -0.64
  L2.D.povertyrate
1               NA
2               NA
3               NA
4            -0.64
 [ reached 'max' / getOption("max.print") -- omitted 2 rows ]

> #LOAD REDISTRICTING INFO
> setwd(datadir)

> redf<-read.csv(
+   'redf.csv',
+   stringsAsFactors=F
+ )

> redf$X<-NULL

> ###define the treatment period
> #two dummy variables
> #one for before/after point of treatment
> #one for the group of states that were treated/not treated
> #DinD is the interaction of the two (i.e., treatment period
> #in the treated states)
> t.year<-1990

> t.year<-t.year+1 #because we are using lags of IV's rather than leads of DVs

> beodf$post.t<- as.numeric(beodf$year>t.year)

> treated<-c("AL","FL","GA","LA","MS","NC","SC","TX","VA","NY")

> beodf$t<-as.numeric(beodf$state_alpha2%in%treated)

> beodf$t.post.t<-beodf$t * beodf$post.t

> #we can also interact the treatment variables by 
> #the magnitude of the treatment effect, 
> #which will be the number of new districts created
> redf<-redf[,c("statename","newdistricts_pctalld92")]

> names(redf)<-c("statename","newds")

> #add all other states, and assume all excluded are 0
> allstates<-beodf$statename %>% unique %>% as.character

> sum(redf$statename%in%allstates)==length(redf$statename) #names harmonize
[1] TRUE

> newstates<-allstates[!allstates%in%redf$statename]

> tmpdf<-data.frame(statename=newstates,newds=0)

> redf2<-rbind.fill(redf,tmpdf)

> #add this data to beodf
> beodf<-merge(
+   beodf,
+   redf2,
+   by=c("statename"),
+   all=T
+ )

> #this will be useful, below
> median.redistricting<-median(
+   redf2$newds[redf2$newds>0 & !is.na(redf2$newds)]
+ )

> #add year2, for estaimation of year trend
> beodf$year2<-beodf$year-1990 

> #output for graph of beo rep
> tmpvars<-c(
+   "state_alpha2",
+   "year",
+   "beopct_all",
+   "post.t",
+   "t",
+   "t.post.t"
+ )

> tmpdf<-beodf[!is.na(beodf$beopct_all),tmpvars]

> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> write.csv(
+   tmpdf,
+   '03_dind_beodf_dd.csv',
+   row.names=F
+ )

> ########################################################
> ########################################################
> 
> #last, to use the bootstrap proceudre, we need to define 
> #a function, getbootstats(), which is written to be passed to boot
> #store<-NA
> getbootstats<-function(x,i,mydf,myformula,b.ols) {  
+   #x<-unique(thisdf$state_alpha2)
+   #i<-sample(1:length(x),replace=T)
+   #mydf<-thisdf[these.obs,]
+   #myformula<-thisformula
+   #b.ols<-b.ols
+   #split df into small dfs, based on these indices
+   dfs.list<-lapply(i,function(thisi) subset(mydf,state_alpha2==x[thisi])) 
+   #put this list into one giant df
+   fulldf<-do.call("rbind",dfs.list)
+   #run the desired regression
+   m.boot<-lm(data=fulldf,formula=myformula)
+   #get the desired stats
+   coefs<-coeftest(m.boot,vcov(m.boot))
+   myrow<-coefs[str_detect(row.names(coefs),"t.post.t")]
+   b.boot<-myrow[1]
+   b.se.boot<-myrow[2]
+   t.boot<-abs(b.boot-b.ols)/b.se.boot
+   # if(is.na(t.boot))
+   #   store<<-fulldf
+   return(t.boot)
+ }

> ########################################################
> ########################################################
> 
> #IDENTIFY VARS
> #get dddf dataset
> setwd(metadir); dir()
 [1] "01_po_qinfo.csv"               "01_po_varsdf.csv"             
 [3] "03_dind_cdf.csv"               "03_dind_ddmodsdf.csv"         
 [5] "03_dind_ddvarsdf.csv"          "03_dind_regvarsdf.csv"        
 [7] "03_dind_robdisplay_EDIT.csv"   "03_dind_robdisplay_EDITED.csv"
 [9] "03_dind_roborder_EDIT.csv"     "03_dind_roborder_EDITED.csv"  
[11] "03_dind_robustness.csv"       

> ddvarsdf<-read.csv(
+   '03_dind_ddvarsdf.csv',
+   stringsAsFactors=F
+ )

> dvs<-ddvarsdf$varname[ddvarsdf$type=="dv"]

> ivs<-ddvarsdf$varname[ddvarsdf$type=="iv"]

> idvars<-c("state_alpha2","year")

> treatment.vars<-c(
+   #standard
+   "post.t",
+   "t",
+   "t.post.t",
+   #for interaction
+   "newds"
+ )

> ########################################################
> ########################################################
> 
> #GATHER MODS
> 
> #loop through mods
> setwd(metadir); dir()
 [1] "01_po_qinfo.csv"               "01_po_varsdf.csv"             
 [3] "03_dind_cdf.csv"               "03_dind_ddmodsdf.csv"         
 [5] "03_dind_ddvarsdf.csv"          "03_dind_regvarsdf.csv"        
 [7] "03_dind_robdisplay_EDIT.csv"   "03_dind_robdisplay_EDITED.csv"
 [9] "03_dind_roborder_EDIT.csv"     "03_dind_roborder_EDITED.csv"  
[11] "03_dind_robustness.csv"       

> ddmodsdf<-read.csv(
+   '03_dind_ddmodsdf.csv',
+   stringsAsFactors=F
+ )

> #full space
> ddmodsdf<-expand.grid(
+   dv=dvs,
+   spec=c(ddmodsdf$spec,'statetrend'),
+   method=c(
+     "normal",
+     "aggregation",
+     "bootstrap"
+   ),
+   sample=c(
+     'full',
+     'jimcrow'
+   ),
+   stringsAsFactors=F
+ ) 

> #identify preferred mod
> tmp<-ddmodsdf$dv%in%c(
+   'officers_pcap',
+   'imprt_t_jur',
+   'welfbenefits'
+ ) &
+   ddmodsdf$spec=='divtrend' &
+   ddmodsdf$method=='normal' &
+   ddmodsdf$sample=='full'

> prefmodsdf<-ddmodsdf[tmp,]

> #don't estimate the whole space
> #loop through each condition
> #keeping all other choices from prefmods
> tmpseq.i<-seq_along(names(prefmodsdf))

> ddmodsdf<-lapply(tmpseq.i,function(i) {
+   #i<-1
+   thisname<-names(prefmodsdf)[i]
+   othnames<-names(prefmodsdf)[-i]
+   #take thisname from modsdf
+   #take othnames from prefmodsdf/tmpdf
+   thisperm<-lapply(thisname,function(x)
+     unique(ddmodsdf[[x]])
+   )
+   othperms<-lapply(othnames,function(x)
+     unique(prefmodsdf[[x]])
+   )
+   allperms<-append(
+     thisperm,
+     othperms
+   )
+   #return this
+   returndf<-expand.grid(allperms,stringsAsFactors = F)
+   names(returndf)<-c(thisname,othnames)
+   returndf
+ }) %>% rbind.fill %>% unique

> # #add custom rows
> # newrow<-data.frame(
> #   dv='welfbenefits',
> #   spec='controls',
> #   method='normal'
> # )
> # ddmodsdf<-rbind(ddmodsdf,newrow) %>% unique
> 
> #also estimate the key mods by aggregation and bootstrap
> ddmodsdf$mname<-apply(
+   ddmodsdf,1,paste0,collapse="."
+ )

> ########################################################
> ########################################################
> 
> #GENERATE FORMULAS
> 
> tmpseq.i<-1:nrow(ddmodsdf)

> ddforms<-lapply(tmpseq.i,function(i) {
+   
+   #i<-2
+   thisrow<-ddmodsdf[i,]
+   this.spec<-thisrow$spec
+   thisdv<-thisrow$dv
+   
+   #lhs
+   if(this.spec=="diff") {
+     lhs<-paste0("D.",thisdv)
+   } else {
+     lhs<-thisdv
+   }
+   
+   #rhs
+   #keyterms
+   if(this.spec=="simple") {
+     ddterms<-c("post.t","t","t.post.t") %>%
+       paste0(collapse=" + ")
+     idterms<-c("")
+   } else {
+     ddterms<-c("t.post.t")
+     idterms<-c("factor(state_alpha2)","factor(year)") %>%
+       paste0(collapse=" + ")
+   }
+   if(this.spec%in%c("newds")) {
+     ddterms<-c("t.post.t:newds")
+   }
+   #controls
+   tmp<-ddvarsdf$class%in%c("control")
+   controls<-ddvarsdf$varname[tmp]
+   if(
+     this.spec%in%c(
+       "simple",
+       "fes",
+       "diff",
+       "newds",
+       "divtrend",
+       "regtrend",
+       "statetrend"
+     )
+   ) {
+     controlterms<-c("")
+   } else if (this.spec=="lags") {
+     controlterms<-c(
+       paste0("L.",thisdv)#,
+       #paste0("L.",controls)
+     ) 
+   } else if (this.spec=="controls") {
+     controlterms<-
+       paste0("L.",controls) %>%
+       paste0(collapse=" + ")
+   }
+   
+   #default is divisionXyear trends
+   if(this.spec%in%c(
+     "divtrend",
+     "diff",
+     "newds",
+     "lags",
+     "controls")
+   ) {
+     controlterms<-paste0(
+       controlterms," + year:division"
+     )
+   } else if(this.spec=="regtrend") {
+     controlterms<-paste0(
+       controlterms," + year:region"
+     )
+   } else if(this.spec=='statetrend') {
+     controlterms<-paste0(
+       controlterms," + year2:factor(state_alpha2)"
+     )
+   }
+   
+   
+   #put form together
+   rhs<-paste(
+     ddterms,
+     idterms,
+     controlterms,
+     sep=" + "
+   )
+   
+   #get rid of extra + signs
+   rhs<-str_replace_all(rhs,"\\+\\s+\\+","+ ") %>%
+     str_replace("\\s+\\+\\s+$","") %>%
+     str_replace("^\\s+\\+\\s+","")
+   
+   #put the formula togeehter
+   thisform<-paste(
+     lhs,"~",rhs
+   ) %>% as.formula
+   thisform
+   
+ })

> names(ddforms)<-ddmodsdf$mname

> ########################################################
> ########################################################
> 
> #GENERATE DF 
> #for each form
> 
> ddsampsdf <- ddmodsdf

> ddsampsdf$sampname<-apply(
+   ddsampsdf,1,paste0,collapse="."
+ )

> tmpseq.i<-1:nrow(ddsampsdf)

> ddsamps<-lapply(tmpseq.i,function(i) {
+   #get cols
+   #i<-21
+   print(i)
+   thisrow<-ddsampsdf[i,]
+   thisdv<-thisrow$dv
+   thisspec<-thisrow$spec
+   thismethod<-thisrow$method
+   thissample<-thisrow$sample
+   #this is the core df
+   fulldf<-beodf
+   #if not, normal
+   #these are the cols we want
+   allforms<-ddforms[ddmodsdf$dv==thisdv & ddmodsdf$spec==thisspec]
+   allvars<-lapply(allforms,all.vars) %>%
+     unlist %>%
+     unique
+   #this gives us rows
+   tmp<-allvars%in%names(fulldf)
+   if(sum(!tmp)>0) {
+     print(allvars[!tmp])
+     stop('dont have allvars')
+   }
+   tmprows<-complete.cases(fulldf[,allvars]) &
+     beodf$year<=1996 & #cut analysis in 1996
+     beodf$year!=t.year #exclude treatment year
+   if(thissample=='jimcrow') {
+     #limit to the jim crow states
+     tmprows<-tmprows & 
+       beodf$statename%in%c(
+         ##mulroy and katzenlson, p. 606
+         #(following Eubanks/Fresh..)
+         'Missouri',
+         'Arkansas',
+         'Louisiana',
+         'Oklahoma',
+         'Texas',
+         'Alabama',
+         'Kentucky',
+         'Mississippi',
+         'Tennesee',
+         'Delaware',
+         'Florida',
+         'Georgia',
+         'Marlyand',
+         'North Carolina',
+         'South Carolina',
+         'Virginia',
+         'West Virginia',
+         'Arizona',
+         'Kansas',
+         'New Mexico'
+       )
+   }
+   #these are extra vars
+   idvars<-c(
+     "state_alpha2",
+     "year"
+   )
+   extravars<-c(
+     'post.t'
+   )
+   tmpcols<-c(
+     idvars,
+     extravars,
+     allvars
+   ) %>% unique
+   tmpcols<-tmpcols[tmpcols%in%names(fulldf)]
+   #so this is the df
+   thisdf<-fulldf[tmprows,tmpcols]
+   #but if we are aggregating, aggregate!
+   if(thismethod=="aggregation") {
+     tmplist<-list(thisdf$post.t,thisdf$state_alpha2)
+     thisdf<-aggregate(thisdf,by=tmplist,function(x) {
+       if (class(x)=="numeric" | class(x)=="integer") {
+         y<-mean(x,na.rm=T)
+       } else {
+         y<-unique(x)
+       }
+       return(y)
+     })
+   }
+   #return
+   thisdf
+ })
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11
[1] 12
[1] 13
[1] 14
[1] 15
[1] 16
[1] 17
[1] 18
[1] 19
[1] 20
[1] 21
[1] 22
[1] 23
[1] 24
[1] 25
[1] 26
[1] 27
[1] 28
[1] 29
[1] 30
[1] 31
[1] 32
[1] 33
[1] 34
[1] 35
[1] 36
[1] 37
[1] 38
[1] 39

> names(ddsamps)<-ddsampsdf$sampname

> ddsampsinfodf<-lapply(tmpseq.i,function(i) {
+   #i<-1
+   tmpdf<-ddsamps[[i]]
+   data.frame(
+     sampname=names(ddsamps)[i],
+     N=nrow(tmpdf),
+     N.states=length(unique(tmpdf$state_alpha2)),
+     range=paste0(
+       min(tmpdf$year),"-",max(tmpdf$year)
+     )
+   )
+ }) %>% rbind.fill

> ddsampsdf<-merge(
+   ddsampsdf,
+   ddsampsinfodf,
+   by="sampname"
+ )

> ########################################################
> ########################################################
> 
> #ESTIMATE AND RETURN TARGET COEF
> 
> this.sequence<-seq_along(ddforms)

> tmpoutput<-lapply(this.sequence,function(i) {
+   #i<-2
+   #progress
+   print(
+     paste(
+       "Estimating model",
+       i,"of",max(this.sequence)
+     )
+   )
+   #get params
+   thisform<-ddforms[[i]]
+   thisdv<-ddmodsdf$dv[i]
+   thismethod<-ddmodsdf$method[i]
+   this.spec<-ddmodsdf$spec[i]
+   this.sample<-ddmodsdf$sample[i]
+   thismname<-ddmodsdf$mname[i]
+   tmprow<-ddsampsdf$dv==thisdv & 
+     ddsampsdf$method==thismethod &
+     ddsampsdf$spec==this.spec &
+     ddsampsdf$sample==this.sample
+   this.sampname<-ddsampsdf$sampname[tmprow]
+   thisdf<-ddsamps[[this.sampname]]
+   
+   if(thismethod%in%c("normal","agg")) {
+     
+     #if normal, straightforward
+     m<-plm(
+       data=thisdf,
+       formula=thisform,
+       index=c(
+         "state_alpha2",
+         "year"
+       ),
+       model="pooling"
+     )
+     #get coefs
+     coefs<-m$coefficients
+     thisvcov<-vcovHC(
+       m,
+       type="HC1",
+       cluster="group"
+     )
+     coefs.tested<-coeftest(m,thisvcov)
+     #multiplier? 
+     if(this.spec=="newds") {
+       multiplier<-median.redistricting
+     } else {
+       multiplier<-1
+     }
+     #pull out coef of interest
+     thevar.regex<-paste0("t.post.t")
+     thisrow<-str_detect(row.names(coefs.tested),thevar.regex)
+     term<-row.names(coefs.tested)[thisrow]
+     est<-coefs.tested[thisrow,"Estimate"]
+     se<-coefs.tested[thisrow,"Std. Error"]
+     tval.col<-str_detect(colnames(coefs.tested),"t.value")
+     t<-coefs.tested[thisrow,tval.col]
+     pval<-coefs.tested[thisrow,"Pr(>|t|)"]
+     #multiply by iv.sds
+     est<-est*multiplier
+     se<-se*multiplier
+     #now compute
+     est.min<-est-1.96*se
+     est.max<-est+1.96*se
+     #returnrow
+     returnrow<-data.frame(
+       iv="t.post.t",
+       term,
+       mu=est,
+       mu.min=est.min,
+       mu.max=est.max,
+       se,
+       pval,
+       t,
+       stringsAsFactors=F
+     )
+     returnrow$type<-"shortrun"
+     if(this.spec=="lags") {
+       #add longrun, which is pref
+       #all ests involving this var
+       tmprows<-str_detect(names(coefs),"t.post.t")
+       iv.terms<-names(coefs)[tmprows]
+       #all ests with dv
+       tmprows<-str_detect(names(coefs),thisdv)
+       lagdv.terms<-names(coefs)[tmprows]
+       #get the longrun estimate
+       means<-c(
+         coefs[lagdv.terms],
+         coefs[iv.terms]
+       )
+       #get the vcov matrix
+       new.vcov<-thisvcov #from above
+       rows<-row.names(new.vcov)%in%c(lagdv.terms,iv.terms)
+       cols<-colnames(new.vcov)%in%c(lagdv.terms,iv.terms)
+       vcov.useme<-new.vcov[rows,cols]
+       #vcov needs to be ordered in the same way as the means
+       new.order<-match(names(means),row.names(vcov.useme))
+       vcov.useme<-vcov.useme[new.order,new.order]
+       #sample from the multivariate distribution defined here
+       draws<-mvrnorm(n=reps,mu=means,Sigma=vcov.useme)
+       numerator<-apply(draws[,iv.terms] %>% as.matrix,1,sum) #sum iv terms
+       denominator<- 1 - apply(draws[,lagdv.terms] %>% as.matrix,1,sum) #sum dv terms
+       lrm.distribution<-numerator/denominator
+       #put this distribution in meaningful units
+       lrm.distribution<-lrm.distribution * multiplier
+       returnrow_tmp<-summarize.distribution2(lrm.distribution)
+       #return this info (but not w/ lag)
+       returnrow_tmp$iv<-"t.post.t"
+       returnrow_tmp$type<-"longrun"
+       returnrow<-rbind.fill(
+         returnrow,
+         returnrow_tmp
+       )
+     }
+   } else {
+     
+     #if this is a bootstrapped procedure, start here
+     m.ols<-lm(
+       data=thisdf,
+       formula=thisform,
+       na.action=na.exclude
+     )
+     these.obs<-!is.na(residuals(m.ols))
+     #use standard vcov; duflo (2004) does not say robust SE's
+     coefs<-coeftest(
+       m.ols,
+       vcov(m.ols)
+     ) 
+     tmprow<-str_detect(row.names(coefs),"t.post.t")
+     thiscoef<-coefs[tmprow]
+     t.ols<-abs(thiscoef[3])
+     b.ols<-thiscoef[1]
+     #now we need to block-bootstrap
+     #and examine the distributions
+     t.distribution<-boot(
+       data=unique(thisdf$state_alpha2[these.obs]),
+       statistic=getbootstats,
+       R=boot.reps,
+       mydf=thisdf[these.obs,],
+       myformula=thisform,
+       b.ols=b.ols
+     )
+     tmp<-t.distribution$t
+     if(sum(is.na(tmp))>0)
+       print(paste0(thisformulaname," yields some NAs"))
+     t.thresh<-quantile(tmp,c(0.99,0.95,0.90),na.rm=T)
+     
+     ###THIS GOES TO PVAL.CLASS
+     if(t.ols>t.thresh[1]) {
+       pval.class<-"at alpha=0.01"
+     } else if(t.ols>t.thresh[2]) {
+       pval.class<-"at alpha=0.05"
+     } else if(t.ols>t.thresh[3]) {
+       pval.class<-"at alpha=0.10"
+     } else {
+       pval.class<-"not sig"
+     }
+     
+     #only other thing to return is b.ols
+     #which we make the est
+     est<-b.ols
+     returnrow<-data.frame(
+       iv="t.post.t",
+       term="t.post.t",
+       type="t.based",
+       mu=b.ols,
+       pval.class=pval.class,
+       stringsAsFactors=F
+     )
+   }
+   
+   #returnrow
+   returnrow$mname<-thismname
+   
+   #return 
+   returnrow
+   
+ }) %>% rbind.fill
[1] "Estimating model 1 of 39"
[1] "Estimating model 2 of 39"
NaNs produced 
[1] "Estimating model 3 of 39"
[1] "Estimating model 4 of 39"
NaNs produced 
[1] "Estimating model 5 of 39"
[1] "Estimating model 6 of 39"
NaNs produced 
[1] "Estimating model 7 of 39"
[1] "Estimating model 8 of 39"
[1] "Estimating model 9 of 39"
NaNs produced 
[1] "Estimating model 10 of 39"
[1] "Estimating model 11 of 39"
NaNs produced 
[1] "Estimating model 12 of 39"
[1] "Estimating model 13 of 39"
[1] "Estimating model 14 of 39"
NaNs produced 
[1] "Estimating model 15 of 39"
[1] "Estimating model 16 of 39"
[1] "Estimating model 17 of 39"
NaNs produced 
[1] "Estimating model 18 of 39"
NaNs produced 
[1] "Estimating model 19 of 39"
NaNs produced 
[1] "Estimating model 20 of 39"
[1] "Estimating model 21 of 39"
NaNs produced 
[1] "Estimating model 22 of 39"
NaNs produced 
[1] "Estimating model 23 of 39"
[1] "Estimating model 24 of 39"
[1] "Estimating model 25 of 39"
NaNs produced 
[1] "Estimating model 26 of 39"
NaNs produced 
[1] "Estimating model 27 of 39"
NaNs produced 
[1] "Estimating model 28 of 39"
[1] "Estimating model 29 of 39"
NaNs produced 
[1] "Estimating model 30 of 39"
NaNs produced 
[1] "Estimating model 31 of 39"
[1] "Estimating model 32 of 39"
[1] "Estimating model 33 of 39"
[1] "Estimating model 34 of 39"
[1] "Estimating model 35 of 39"
[1] "Estimating model 36 of 39"
[1] "Estimating model 37 of 39"
[1] "Estimating model 38 of 39"
NaNs produced 
[1] "Estimating model 39 of 39"

> #put together in df
> ddestsdf<-rbind.fill(tmpoutput)

> ddestsdf
         iv           term           mu        mu.min      mu.max          se         pval
1  t.post.t       t.post.t  51.75794035 -3.664135e+01  140.157227 45.10167676 2.513345e-01
2  t.post.t       t.post.t 121.60406956  3.130702e+01  211.901123 46.06992501 8.449572e-03
3  t.post.t       t.post.t  20.07484710  1.867028e+00   38.282666  9.28970358 3.091940e-02
4  t.post.t       t.post.t  20.28804829  4.267176e+00   36.308921  8.17391454 1.318010e-02
5  t.post.t       t.post.t   0.07071604 -9.344492e-02    0.234877  0.08375559 3.986347e-01
6  t.post.t       t.post.t  -0.01978549 -1.938120e-01    0.154241  0.08878902 8.236940e-01
7  t.post.t       t.post.t  90.04753888  3.066505e+01  149.430031 30.29719010 3.111030e-03
8  t.post.t       t.post.t 162.32270896  7.887164e+01  245.773778 42.57707630 1.449700e-04
9  t.post.t       t.post.t 162.27010713  7.627335e+01  248.266861 43.87589499 2.279630e-04
10 t.post.t       t.post.t  12.09736294 -2.589799e+00   26.784525  7.49345007 1.068190e-01
11 t.post.t t.post.t:newds  69.32037157  1.044831e+01  128.192434 30.03676651 2.126208e-02
12 t.post.t       t.post.t  15.33478086 -1.533175e+00   32.202737  8.60610003 7.513614e-02
13 t.post.t           <NA> 476.44584319 -1.408885e+03 2844.921184          NA           NA
14 t.post.t       t.post.t 111.15132932  1.319344e+01  209.109218 49.97851482 2.637278e-02
15 t.post.t       t.post.t  95.45091012  3.150259e+01  159.399231 32.62669444 3.513950e-03
16 t.post.t       t.post.t  13.34264400  1.118410e+00   25.566878  6.23685403 3.285071e-02
17 t.post.t       t.post.t  34.51538293  1.916392e+01   49.866843  7.83237766 1.132618e-05
18 t.post.t       t.post.t  34.51538293  1.872989e+01   50.300871  8.05382054 1.959857e-05
19 t.post.t       t.post.t  -0.47485493 -4.349326e+00    3.399616  1.97677100 8.102097e-01
20 t.post.t t.post.t:newds  20.28511474  8.271106e+00   32.299124  6.12959644 9.688783e-04
21 t.post.t       t.post.t   4.73564063 -2.545218e-01    9.725803  2.54600127 6.316802e-02
22 t.post.t           <NA>  18.51687890 -1.000162e+00   33.306029          NA           NA
23 t.post.t       t.post.t  20.05634838  2.798219e+00   37.314477  8.80516784 2.291490e-02
24 t.post.t       t.post.t   8.67642894 -2.078035e+00   19.430893  5.48697139 1.140728e-01
25 t.post.t       t.post.t   3.60043089 -6.607055e+00   13.807916  5.20790079 4.896676e-01
26 t.post.t       t.post.t  30.81031921  1.141899e+01   50.201649  9.89353573 1.874485e-03
27 t.post.t       t.post.t  30.81031921  1.095291e+01   50.667729 10.13133154 2.394003e-03
28 t.post.t       t.post.t   0.92493867 -2.452400e+00    4.302278  1.72313215 5.915096e-01
29 t.post.t t.post.t:newds   8.14326647 -4.695498e+00   20.982031  6.55038989 2.140299e-01
30 t.post.t       t.post.t   1.67899423 -1.663765e+00    5.021754  1.70548962 3.250634e-01
31 t.post.t           <NA>  44.33507432 -4.530509e+01  135.887543          NA           NA
32 t.post.t       t.post.t  12.67457487 -7.091426e+00   32.440576 10.08469440 2.090092e-01
33 t.post.t       t.post.t  13.38688443 -1.241502e+00   28.015271  7.46346272 7.305500e-02
34 t.post.t       t.post.t  96.55925244            NA          NA          NA           NA
35 t.post.t       t.post.t 123.07978814            NA          NA          NA           NA
36 t.post.t       t.post.t  20.07484710            NA          NA          NA           NA
37 t.post.t       t.post.t  18.37307040            NA          NA          NA           NA
38 t.post.t       t.post.t  20.28804829            NA          NA          NA           NA
39 t.post.t       t.post.t  22.09967995            NA          NA          NA           NA
40 t.post.t       t.post.t 201.75604390  1.058548e+02  297.657245 48.92918407 4.845233e-05
41 t.post.t       t.post.t  19.02191490 -5.118692e+00   43.162522 12.31663630 1.234280e-01
42 t.post.t       t.post.t  15.28388504 -6.944322e+00   37.512092 11.34092183 1.784546e-01
            t     type                                   mname    pval.class
1   1.1475835 shortrun        incrt_t_jur.divtrend.normal.full          <NA>
2   2.6395543 shortrun        imprt_t_jur.divtrend.normal.full          <NA>
3   2.1609782 shortrun      officers_pcap.divtrend.normal.full          <NA>
4   2.4820480 shortrun       welfbenefits.divtrend.normal.full          <NA>
5   0.8443143 shortrun lncorrections_pcap.divtrend.normal.full          <NA>
6  -0.2228372 shortrun    lnpolicesp_pcap.divtrend.normal.full          <NA>
7   2.9721416 shortrun        imprt_t_jur.controls.normal.full          <NA>
8   3.8124438 shortrun          imprt_t_jur.simple.normal.full          <NA>
9   3.6983885 shortrun             imprt_t_jur.fes.normal.full          <NA>
10  1.6143916 shortrun            imprt_t_jur.diff.normal.full          <NA>
11  2.3078507 shortrun           imprt_t_jur.newds.normal.full          <NA>
12  1.7818502 shortrun            imprt_t_jur.lags.normal.full          <NA>
13         NA  longrun            imprt_t_jur.lags.normal.full       not sig
14  2.2239822 shortrun        imprt_t_jur.regtrend.normal.full          <NA>
15  2.9255465 shortrun      imprt_t_jur.statetrend.normal.full          <NA>
16  2.1393228 shortrun      officers_pcap.controls.normal.full          <NA>
17  4.4067567 shortrun        officers_pcap.simple.normal.full          <NA>
18  4.2855913 shortrun           officers_pcap.fes.normal.full          <NA>
19 -0.2402175 shortrun          officers_pcap.diff.normal.full          <NA>
20  3.3093720 shortrun         officers_pcap.newds.normal.full          <NA>
21  1.8600307 shortrun          officers_pcap.lags.normal.full          <NA>
22         NA  longrun          officers_pcap.lags.normal.full at alpha=0.10
23  2.2777929 shortrun      officers_pcap.regtrend.normal.full          <NA>
24  1.5812783 shortrun    officers_pcap.statetrend.normal.full          <NA>
25  0.6913401 shortrun       welfbenefits.controls.normal.full          <NA>
26  3.1141869 shortrun         welfbenefits.simple.normal.full          <NA>
27  3.0410928 shortrun            welfbenefits.fes.normal.full          <NA>
28  0.5367776 shortrun           welfbenefits.diff.normal.full          <NA>
29  1.2431728 shortrun          welfbenefits.newds.normal.full          <NA>
30  0.9844646 shortrun           welfbenefits.lags.normal.full          <NA>
31         NA  longrun           welfbenefits.lags.normal.full       not sig
32  1.2568130 shortrun       welfbenefits.regtrend.normal.full          <NA>
33  1.7936560 shortrun     welfbenefits.statetrend.normal.full          <NA>
34         NA  t.based   imprt_t_jur.divtrend.aggregation.full at alpha=0.10
35         NA  t.based     imprt_t_jur.divtrend.bootstrap.full at alpha=0.01
36         NA  t.based officers_pcap.divtrend.aggregation.full at alpha=0.01
37         NA  t.based   officers_pcap.divtrend.bootstrap.full at alpha=0.01
38         NA  t.based  welfbenefits.divtrend.aggregation.full at alpha=0.01
39         NA  t.based    welfbenefits.divtrend.bootstrap.full at alpha=0.01
40  4.1234296 shortrun     imprt_t_jur.divtrend.normal.jimcrow          <NA>
41  1.5444083 shortrun   officers_pcap.divtrend.normal.jimcrow          <NA>
42  1.3476757 shortrun    welfbenefits.divtrend.normal.jimcrow          <NA>

> #classify the shortrun pvals into pval class
> tmp<-ddestsdf$type=="shortrun"

> ddestsdf$pval.class[ddestsdf$pval<0.01 & tmp]<-"at alpha=0.01"

> ddestsdf$pval.class[ddestsdf$pval>=0.01 & ddestsdf$pval<0.05 & tmp]<-"at alpha=0.05"

> ddestsdf$pval.class[ddestsdf$pval>=0.05 & ddestsdf$pval<0.10 & tmp]<-"at alpha=0.10"

> ddestsdf$pval.class[ddestsdf$pval>=0.10 & tmp]<-"not sig"

> tmp<-is.na(ddestsdf$pval.class)

> if(sum(tmp)>0)
+   stop()

> ########################################################
> ########################################################
> 
> #MERGE
> ddfinaldf<-merge(
+   ddestsdf,
+   ddmodsdf,
+   by="mname"
+ ) %>% unique

> table(ddestsdf$mname)

       imprt_t_jur.controls.normal.full            imprt_t_jur.diff.normal.full 
                                      1                                       1 
  imprt_t_jur.divtrend.aggregation.full     imprt_t_jur.divtrend.bootstrap.full 
                                      1                                       1 
       imprt_t_jur.divtrend.normal.full     imprt_t_jur.divtrend.normal.jimcrow 
                                      1                                       1 
            imprt_t_jur.fes.normal.full            imprt_t_jur.lags.normal.full 
                                      1                                       2 
          imprt_t_jur.newds.normal.full        imprt_t_jur.regtrend.normal.full 
                                      1                                       1 
         imprt_t_jur.simple.normal.full      imprt_t_jur.statetrend.normal.full 
                                      1                                       1 
       incrt_t_jur.divtrend.normal.full lncorrections_pcap.divtrend.normal.full 
                                      1                                       1 
   lnpolicesp_pcap.divtrend.normal.full      officers_pcap.controls.normal.full 
                                      1                                       1 
         officers_pcap.diff.normal.full officers_pcap.divtrend.aggregation.full 
                                      1                                       1 
  officers_pcap.divtrend.bootstrap.full      officers_pcap.divtrend.normal.full 
                                      1                                       1 
  officers_pcap.divtrend.normal.jimcrow           officers_pcap.fes.normal.full 
                                      1                                       1 
         officers_pcap.lags.normal.full         officers_pcap.newds.normal.full 
                                      2                                       1 
     officers_pcap.regtrend.normal.full        officers_pcap.simple.normal.full 
                                      1                                       1 
   officers_pcap.statetrend.normal.full       welfbenefits.controls.normal.full 
                                      1                                       1 
          welfbenefits.diff.normal.full  welfbenefits.divtrend.aggregation.full 
                                      1                                       1 
   welfbenefits.divtrend.bootstrap.full       welfbenefits.divtrend.normal.full 
                                      1                                       1 
   welfbenefits.divtrend.normal.jimcrow            welfbenefits.fes.normal.full 
                                      1                                       1 
          welfbenefits.lags.normal.full          welfbenefits.newds.normal.full 
                                      2                                       1 
      welfbenefits.regtrend.normal.full         welfbenefits.simple.normal.full 
                                      1                                       1 
    welfbenefits.statetrend.normal.full 
                                      1 

> table(ddmodsdf$mname)

       imprt_t_jur.controls.normal.full            imprt_t_jur.diff.normal.full 
                                      1                                       1 
  imprt_t_jur.divtrend.aggregation.full     imprt_t_jur.divtrend.bootstrap.full 
                                      1                                       1 
       imprt_t_jur.divtrend.normal.full     imprt_t_jur.divtrend.normal.jimcrow 
                                      1                                       1 
            imprt_t_jur.fes.normal.full            imprt_t_jur.lags.normal.full 
                                      1                                       1 
          imprt_t_jur.newds.normal.full        imprt_t_jur.regtrend.normal.full 
                                      1                                       1 
         imprt_t_jur.simple.normal.full      imprt_t_jur.statetrend.normal.full 
                                      1                                       1 
       incrt_t_jur.divtrend.normal.full lncorrections_pcap.divtrend.normal.full 
                                      1                                       1 
   lnpolicesp_pcap.divtrend.normal.full      officers_pcap.controls.normal.full 
                                      1                                       1 
         officers_pcap.diff.normal.full officers_pcap.divtrend.aggregation.full 
                                      1                                       1 
  officers_pcap.divtrend.bootstrap.full      officers_pcap.divtrend.normal.full 
                                      1                                       1 
  officers_pcap.divtrend.normal.jimcrow           officers_pcap.fes.normal.full 
                                      1                                       1 
         officers_pcap.lags.normal.full         officers_pcap.newds.normal.full 
                                      1                                       1 
     officers_pcap.regtrend.normal.full        officers_pcap.simple.normal.full 
                                      1                                       1 
   officers_pcap.statetrend.normal.full       welfbenefits.controls.normal.full 
                                      1                                       1 
          welfbenefits.diff.normal.full  welfbenefits.divtrend.aggregation.full 
                                      1                                       1 
   welfbenefits.divtrend.bootstrap.full       welfbenefits.divtrend.normal.full 
                                      1                                       1 
   welfbenefits.divtrend.normal.jimcrow            welfbenefits.fes.normal.full 
                                      1                                       1 
          welfbenefits.lags.normal.full          welfbenefits.newds.normal.full 
                                      1                                       1 
      welfbenefits.regtrend.normal.full         welfbenefits.simple.normal.full 
                                      1                                       1 
    welfbenefits.statetrend.normal.full 
                                      1 

> ddmodsdf$mname%in%ddfinaldf$mname
 [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[19] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[37] TRUE TRUE TRUE

> #standardize
> tmp<-sapply(dvs,function(thisdv) {
+   #thisdv<-dvs[1]
+   tmp<-ddsampsdf$dv==thisdv &
+     ddsampsdf$method=="normal" &
+     ddsampsdf$spec=='divtrend' &
+     ddsampsdf$sample=='full'
+   this.sampname<-ddsampsdf$sampname[tmp]
+   thisdf<-ddsamps[[this.sampname]]
+   tapply(
+     thisdf[[thisdv]],
+     thisdf$state_alpha2,
+     sd,
+     na.rm=T
+   ) %>% mean(na.rm=T)
+ })

> ddsdsdf<-data.frame(
+   dv=dvs,
+   dvsd=unname(tmp),
+   stringsAsFactors=F
+ )

> #merge it into plotdf
> ddfinaldf<-merge(
+   ddfinaldf,
+   ddsdsdf,
+   by="dv"
+ )

> ddfinaldf$musd<-ddfinaldf$mu/ddfinaldf$dvsd

> ddfinaldf$musd.max<-ddfinaldf$mu.max/ddfinaldf$dvsd

> ddfinaldf$musd.min<-ddfinaldf$mu.min/ddfinaldf$dvsd

> ########################################################
> ########################################################
> 
> #save out
> setwd(filesdir)

> write.csv(
+   ddfinaldf,
+   '03_dind_resultsdf.csv',
+   row.names=F
+ )
[1] "######"
[1] "Running:"
[1] "02_regmods.R"

> #########################################################
> #########################################################
> 
> #clear workspace
> rm(list=ls())

> #load packages
> require(stringr)

> require(plyr)

> require(dplyr)

> require(zoo)

> require(data.table)

> require(tidyr)

> require(rprojroot)

> #extras
> require(haven)

> require(readr)

> require(plm)

> #set dirs
> rootdir<-find_root(
+   criterion=has_file('_rapol.Rproj')
+ )

> codedir<-file.path(rootdir,"code")

> setwd(codedir); dir()
[1] "01_publicopinion" "02_voting"        "03_dind"          "dirs.R"          
[5] "runeverything.R" 

> source('dirs.R')

> #load helper functions
> setwd(dcodedir)

> source('beofunctions.R')

> source('beofunctions2.R')

> #set seed
> set.seed(23)

> reps<-1000

> #########################################################
> #########################################################
> 
> #LOAD
> setwd(datadir); dir()
 [1] "abcdfs.csv"                                 "All_Bills(1947-2017).csv"                  
 [3] "american_violence_data_20241016_201046.csv" "anesdf.csv"                                
 [5] "beodf.csv"                                  "beodf5.csv"                                
 [7] "billsdf_revised.csv"                        "cbcdf.csv"                                 
 [9] "cbsdfs.csv"                                 "citecount.csv"                             
[11] "congressmen_list.csv"                       "conventional.bib"                          
[13] "cpopdf.csv"                                 "Ethnic_Collect_Action.dta"                 
[15] "fulldf_bills.csv"                           "gallupdfs.csv"                             
[17] "gssdf.csv"                                  "Hall_votes.csv"                            
[19] "histpundf_national_220328.csv"              "housevotes_handcoded_revised.csv"          
[21] "HSall_members.csv"                          "HSall_rollcalls_withissues.csv"            
[23] "incrates_subnationalstate.csv"              "lbj1964.csv"                               
[25] "nbclatdfs.csv"                              "prezdf.csv"                                
[27] "questions_fortex.txt"                       "race_riot.dta"                             
[29] "redf.csv"                                   "roperdfs.csv"                              
[31] "tab_longviolence.csv"                       "timedfs.csv"                               
[33] "votes"                                     

> beodfraw<-read.csv(
+   'beodf.csv',
+   stringsAsFactors=F
+ )

> #########################################################
> #########################################################
> 
> #SET UP REG
> 
> #dv
> dvs<-c(
+   "incrt_t_jur",
+   "imprt_t_jur",
+   "officers_pcap"
+ )

> #dvlags
> dvlags<-c(
+   "onelagdv",
+   "twolagdv",
+   "fourlagdv"
+ )

> #spec
> ivspec<-c(
+   "bivariate",
+   "onelagivs",
+   "twolagivs"
+ )

> tmp<-varnamesdf$class%in%c(
+   "beo",
+   "control"
+ )

> beovars<-
+   varnamesdf$varname[tmp & varnamesdf$class=="beo"]

> controls<-
+   varnamesdf$varname[tmp & varnamesdf$class=="control"]

> specslist<-list(
+   ###bivariate
+   bivariate=list(
+     main=paste0("L.",beovars),
+     other=paste0("")
+   ),
+   ###1 lag
+   onelagivs=list(
+     main=paste0("L.",beovars),
+     other=paste0("L.",controls)
+   ),
+   ###2 lag
+   twolagivs=list(
+     main=c(
+       paste0("L.",beovars),
+       paste0("L2.",beovars)
+     ),
+     other=c(
+       paste0("L.",controls),
+       paste0("L2.",controls)
+     )
+   )
+ )

> #lnrealincpc is collinear at multilags, so remove 
> tmp<-specslist$twolagivs$other=="L2.lnrealinc_pc"

> specslist$twolagivs$other[tmp]<-""

> #########################################################
> #########################################################
> 
> #GET FORMS
> 
> modsdf<-expand.grid(
+   dv=dvs,
+   dvlag=dvlags,
+   ivspec=ivspec,
+   stringsAsFactors=F
+ )

> modsdf$mname<-apply(
+   modsdf,1,paste0,collapse="."
+ )

> tmpseq.i<-1:nrow(modsdf)

> forms<-lapply(tmpseq.i,function(i) {
+   #i<-1
+   #get params
+   thisrow<-modsdf[i,]
+   thisdv<-thisrow$dv
+   ####FORMULA
+   ##LHS
+   lhs<-thisdv
+   ##RHS
+   #year fe
+   thisfe.yr<-"factor(year)"
+   #lagdv
+   if(thisrow$dvlag=="onelagdv") {
+     thislagdv<-paste0("L.",thisrow$dv)
+   } else if(thisrow$dvlag=="twolagdv") {
+     thislagdv<-paste0(c("L.","L2."),thisrow$dv) %>%
+       paste0(collapse=" + ")
+   } else if(thisrow$dvlag=="fourlagdv") {
+     thislagdv<-paste0(c("L.","L2.","L3.","L4."),thisrow$dv) %>%
+       paste0(collapse=" + ")
+   }
+   #ivs
+   mainiv<-specslist[[thisrow$ivspec]]$main %>% 
+     paste0(collapse=" + ")
+   otherivs<-specslist[[thisrow$ivspec]]$other %>%
+     paste0(collapse=" + ")
+   #TOGETHER
+   rhs<-paste(
+     thisfe.yr,
+     thislagdv,
+     mainiv,
+     otherivs,
+     sep=" + "
+   )
+   #get rid of extra + signs
+   rhs<-str_replace_all(rhs,"\\+\\s+\\+","+ ") %>%
+     str_replace("\\s+\\+\\s+$","") %>%
+     str_replace("^\\s+\\+\\s+","")
+   #RETURN
+   #put the formula togeehter
+   thisform<-paste(
+     lhs,"~",rhs
+   ) %>% as.formula
+   thisform
+ })

> names(forms)<-modsdf$mname

> #########################################################
> #########################################################
> 
> #GET SAMPS
> #each dv has a sample
> sampsdf<-expand.grid(
+   dv=unique(modsdf$dv),
+   stringsAsFactors=F
+ )

> sampsdf$sampname<-sampsdf$dv

> tmpseq.i<-1:nrow(sampsdf)

> samps<-lapply(tmpseq.i,function(i) {
+   #i<-2
+   thisdv<-sampsdf$dv[i]
+   #this is the df
+   fulldf<-beodfraw
+   #these are the cols we want
+   allforms<-forms[modsdf$dv==thisdv]
+   allvars<-lapply(allforms,all.vars) %>%
+     unlist %>%
+     unique
+   #this gives us rows
+   tmprows<-complete.cases(fulldf[,allvars])
+   #these are extra vars
+   idvars<-c(
+     "state_alpha2",
+     "year"
+   )
+   extravars<-c(
+     #####
+   )
+   tmpcols<-c(
+     idvars,
+     extravars,
+     allvars
+   ) %>% unique
+   tmpcols<-tmpcols[tmpcols%in%names(fulldf)]
+   #so this is the df
+   thisdf<-fulldf[tmprows,tmpcols]
+   #return
+   thisdf
+ })

> names(samps)<-sampsdf$sampname

> sampinfodf<-lapply(tmpseq.i,function(i) {
+   #i<-1
+   tmpdf<-samps[[i]]
+   data.frame(
+     N=nrow(tmpdf),
+     N.states=length(unique(tmpdf$state_alpha2)),
+     range=paste0(
+       min(tmpdf$year),"-",max(tmpdf$year)
+     )
+   )
+ }) %>% rbind.fill

> #sampsdf
> sampsdf<-cbind(
+   sampsdf,
+   sampinfodf
+ )

> sampsdf
             dv      sampname    N N.states     range
1   incrt_t_jur   incrt_t_jur 1802       49 1972-2008
2   imprt_t_jur   imprt_t_jur 1504       43 1974-2008
3 officers_pcap officers_pcap 1764       49 1973-2008

> #########################################################
> #########################################################
> 
> #GET SDS
> #all ivs, dvs
> #use incrt sample
> 
> allvs<-c(
+   dvs,
+   beovars,
+   controls
+ )

> sdsdf<-lapply(allvs,function(thisv) {
+   #thisv<-"beopct_all"
+   if(thisv%in%dvs) {
+     thisdf<-samps[[thisv]] 
+     myv<-thisv
+   } else {
+     thisdf<-samps$incrt_t_jur
+     myv<-paste0("L.",thisv)
+   }
+   print(thisv)
+   isdummy<-F
+   avg<-mean(thisdf[[myv]],na.rm=T)
+   if(isdummy) {
+     sd<-rng<-1
+   } else {
+     sd<-tapply(
+       thisdf[[myv]],
+       thisdf$state_alpha2,
+       sd,na.rm=T
+     ) %>% mean(na.rm=T) 
+     rng<-tapply(
+       thisdf[[myv]],
+       thisdf$state_alpha2,
+       function(x) {
+         diff(
+           quantile(x,c(0.2,0.8),na.rm=T)
+         ) %>% abs
+       } 
+     ) %>% mean(na.rm=T)
+   }
+   data.frame(
+     var=thisv,
+     avg=avg,
+     sd=sd,
+     range=rng,
+     stringsAsFactors=F
+   )
+ }) %>% rbind.fill
[1] "incrt_t_jur"
[1] "imprt_t_jur"
[1] "officers_pcap"
[1] "beopct_all"
[1] "pct_blackpop"
[1] "violent_crt"
[1] "all_allraces_loess"
[1] "demcontrol.klarner"
[1] "txtot_pc"
[1] "unemprate"
[1] "lnrealinc_pc"
[1] "growthrate"
[1] "gini"

> #########################################################
> #########################################################
> 
> #RUN REGS
> this.sequence<-seq_along(forms)

> mods<-lapply(this.sequence,function(i) {
+   #i<-1
+   #progress
+   print(
+     paste(
+       "Estimating model",
+       i,"of",max(this.sequence)
+     )
+   )
+   #get params
+   thisform<-forms[[i]]
+   thisdv<-modsdf$dv[i]
+   thisdf<-samps[[thisdv]]
+   #estimate
+   m.tmp<-plm(
+     data=thisdf,
+     formula=thisform,
+     index=c(
+       "state_alpha2",
+       "year"
+     ),
+     model="within"
+   )
+ })
[1] "Estimating model 1 of 27"
[1] "Estimating model 2 of 27"
[1] "Estimating model 3 of 27"
[1] "Estimating model 4 of 27"
[1] "Estimating model 5 of 27"
[1] "Estimating model 6 of 27"
[1] "Estimating model 7 of 27"
[1] "Estimating model 8 of 27"
[1] "Estimating model 9 of 27"
[1] "Estimating model 10 of 27"
[1] "Estimating model 11 of 27"
[1] "Estimating model 12 of 27"
[1] "Estimating model 13 of 27"
[1] "Estimating model 14 of 27"
[1] "Estimating model 15 of 27"
[1] "Estimating model 16 of 27"
[1] "Estimating model 17 of 27"
[1] "Estimating model 18 of 27"
[1] "Estimating model 19 of 27"
[1] "Estimating model 20 of 27"
[1] "Estimating model 21 of 27"
[1] "Estimating model 22 of 27"
[1] "Estimating model 23 of 27"
[1] "Estimating model 24 of 27"
[1] "Estimating model 25 of 27"
[1] "Estimating model 26 of 27"
[1] "Estimating model 27 of 27"

> names(mods)<-modsdf$mname

> #########################################################
> #########################################################
> 
> #GET ESTS
> 
> #load lmtest
> require(lmtest)

> this.sequence<-seq_along(mods)

> tmpoutput<-lapply(this.sequence,function(i) {
+   #i<-1
+   #progress
+   print(
+     paste(
+       "Getting results from model",
+       i,"of",max(this.sequence)
+     )
+   )
+   #get params
+   m<-mods[[i]]
+   thismname<-modsdf$mname[i]
+   thisdv<-modsdf$dv[i]
+   
+   #get robustvcov coefs
+   coefs<-m$coefficients
+   thisvcov<-vcovHC(
+     m,
+     type="HC1",
+     cluster="group"
+   )
+   coefs.tested<-coeftest(m,thisvcov)
+   
+   #SHORT-RUN
+   #all ivs, and lag dvs
+   print("SR")
+   shortrunvars<-c(thisdv,beovars,controls)
+   sr.sequence<-seq_along(shortrunvars)
+   shortrundf<-lapply(sr.sequence,function(j) {
+     #j<-2
+     #print(j)
+     #get params
+     thisiv<-shortrunvars[j]
+     # #if this is the dv, no multiply
+     if(thisiv==thisdv) {
+       thisiv.sd<-1
+     } else {
+       thisiv.sd<-sdsdf$sd[sdsdf$var==thisiv]
+     }
+     if(length(thisiv.sd)!=1) stop("SD missing.")
+     #get the var(s)
+     thevar.regex<-paste0(
+       "^(L([0-9]+)?\\.)?(D([0-9]+)?\\.)?",
+       thisiv,
+       "$"
+     )
+     thisrow<-str_detect(row.names(coefs.tested),thevar.regex)
+     notvar.regex<-paste0("X",thisiv)
+     thisrow<-thisrow & !str_detect(row.names(coefs.tested),notvar.regex)
+     #if(sum(thisrow)>1)
+     #stop(paste(thisiv,"is matching >1 terms"))
+     #but don't match square terms
+     #thisrow<-thisrow & !str_detect(row.names(coefs.tested),paste0(thisiv,"2"))
+     #this gives term
+     term<-row.names(coefs.tested)[thisrow]
+     est<-coefs.tested[thisrow,"Estimate"]
+     se<-coefs.tested[thisrow,"Std. Error"]
+     tval.col<-str_detect(colnames(coefs.tested),"t.value")
+     t<-coefs.tested[thisrow,tval.col]
+     pval<-coefs.tested[thisrow,"Pr(>|t|)"]
+     #multiply by iv.sds
+     est<-est*thisiv.sd
+     se<-se*thisiv.sd
+     #now compute
+     est.min<-est-1.96*se
+     est.max<-est+1.96*se
+     #return if was in the model
+     if(sum(thisrow)>0) {
+       returnrow<-data.frame(
+         iv=thisiv,
+         term,
+         mu=est,
+         mu.min=est.min,
+         mu.max=est.max,
+         se,
+         pval,
+         t,
+         stringsAsFactors=F
+       )
+     } else {
+       returnrow<-data.frame(
+         iv=thisiv,
+         mu=NA
+       )
+     }
+     #return
+     return(returnrow)
+   }) %>% rbind.fill
+   #identify
+   shortrundf$type<-"shortrun"
+   #chuck all nas
+   shortrundf<-shortrundf[!is.na(shortrundf$mu),]
+   
+   ##########################################
+   
+   #LONG-RUN
+   print("LR")
+   longrunvars<-c(beovars,controls)
+   lroutput<-lapply(seq_along(longrunvars),function(j) {
+     #j<-2
+     #print(j)
+     #get params
+     thisiv<-longrunvars[j]
+     # #get sd
+     #thisiv.sd<-1
+     thisiv.sd<-sdsdf$sd[sdsdf$var==thisiv]
+     if(length(thisiv.sd)!=1) stop("SD missing.")
+     #did we detect var?
+     thevar.regex<-paste0(
+       "^(L([0-9]+)?\\.)?(D([0-9]+)?\\.)?",
+       thisiv,
+       "$"
+     )
+     tmprows<-str_detect(names(coefs),thisiv)
+     if(sum(tmprows)==0) {
+       returnrow<-data.frame(
+         mu=NA
+       )
+     } else {
+       #all ests involving this var
+       iv.terms<-names(coefs)[tmprows]
+       #all ests with dv
+       tmprows<-str_detect(names(coefs),thisdv)
+       lagdv.terms<-names(coefs)[tmprows]
+       #get the longrun estimate
+       means<-c(
+         coefs[lagdv.terms],
+         coefs[iv.terms]
+       )
+       #get the vcov matrix
+       new.vcov<-thisvcov #from above
+       rows<-row.names(new.vcov)%in%c(lagdv.terms,iv.terms)
+       cols<-colnames(new.vcov)%in%c(lagdv.terms,iv.terms)
+       vcov.useme<-new.vcov[rows,cols]
+       #vcov needs to be ordered in the same way as the means
+       new.order<-match(names(means),row.names(vcov.useme))
+       vcov.useme<-vcov.useme[new.order,new.order]
+       #sample from the multivariate distribution defined here
+       draws<-mvrnorm(n=reps,mu=means,Sigma=vcov.useme)
+       numerator<-apply(draws[,iv.terms] %>% as.matrix,1,sum) #sum iv terms
+       denominator<- 1 - apply(draws[,lagdv.terms] %>% as.matrix,1,sum) #sum dv terms
+       lrm.distribution<-numerator/denominator
+       #put this distribution in meaningful units
+       lrm.distribution<-lrm.distribution * thisiv.sd
+       returnrow<-summarize.distribution2(lrm.distribution)
+       #return this info (but not w/ lag)
+       returnrow$iv<-thisiv
+     }
+     return(returnrow)
+   })
+   longrundf<-rbind.fill(lroutput)
+   #identify as longrun
+   longrundf$type<-"longrun"
+   #get rid of this when not applicable
+   longrundf<-longrundf[!is.na(longrundf$mu),]
+   
+   ##########################################
+   
+   #finalize
+   thism.estsdf<-rbind.fill(shortrundf,longrundf)
+   thism.estsdf$mname<-thismname
+   thism.estsdf$seq<-i
+   return(thism.estsdf)
+ })
[1] "Getting results from model 1 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 2 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 3 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 4 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 5 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 6 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 7 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 8 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 9 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 10 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 11 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 12 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 13 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 14 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 15 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 16 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 17 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 18 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 19 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 20 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 21 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 22 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 23 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 24 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 25 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 26 of 27"
[1] "SR"
[1] "LR"
[1] "Getting results from model 27 of 27"
[1] "SR"
[1] "LR"

> #put together in df
> estsdf<-rbind.fill(tmpoutput)

> estsdf
                   iv                 term            mu        mu.min        mu.max
1         incrt_t_jur        L.incrt_t_jur  9.671794e-01  9.533228e-01  9.810360e-01
2          beopct_all         L.beopct_all  3.757686e-01 -1.005734e+00  1.757271e+00
3          beopct_all                 <NA>  1.208669e+01 -3.790467e+01  5.631839e+01
4         imprt_t_jur        L.imprt_t_jur  9.948240e-01  9.784361e-01  1.011212e+00
5          beopct_all         L.beopct_all  5.518662e-01 -1.530262e+00  2.633994e+00
6          beopct_all                 <NA>  8.337682e+01 -7.152888e+02  1.210310e+03
7       officers_pcap      L.officers_pcap  8.159797e-01  7.428664e-01  8.890930e-01
8          beopct_all         L.beopct_all  1.132180e+00  1.395978e-01  2.124762e+00
9          beopct_all                 <NA>  6.197446e+00  7.769425e-01  1.073464e+01
10        incrt_t_jur        L.incrt_t_jur  1.115691e+00  9.967553e-01  1.234627e+00
11        incrt_t_jur       L2.incrt_t_jur -1.540332e-01 -2.711433e-01 -3.692302e-02
12         beopct_all         L.beopct_all  4.696606e-01 -7.736868e-01  1.713008e+00
13         beopct_all                 <NA>  1.166407e+01 -2.055372e+01  4.366796e+01
14        imprt_t_jur        L.imprt_t_jur  1.243437e+00  1.153120e+00  1.333753e+00
15        imprt_t_jur       L2.imprt_t_jur -2.609646e-01 -3.588716e-01 -1.630577e-01
16         beopct_all         L.beopct_all  8.310681e-01 -8.681687e-01  2.530305e+00
17         beopct_all                 <NA>  4.962808e+01 -9.701402e+01  2.780111e+02
18      officers_pcap      L.officers_pcap  5.921860e-01  4.773087e-01  7.070633e-01
19      officers_pcap     L2.officers_pcap  2.674758e-01  1.846874e-01  3.502643e-01
20         beopct_all         L.beopct_all  8.254502e-01 -9.271080e-03  1.660172e+00
21         beopct_all                 <NA>  5.940662e+00  3.734534e-02  1.120543e+01
22        incrt_t_jur        L.incrt_t_jur  1.100804e+00  9.868252e-01  1.214783e+00
23        incrt_t_jur       L2.incrt_t_jur -8.044045e-02 -2.235982e-01  6.271730e-02
24        incrt_t_jur       L3.incrt_t_jur -4.103777e-03 -9.395690e-02  8.574935e-02
25        incrt_t_jur       L4.incrt_t_jur -6.109976e-02 -1.145766e-01 -7.622942e-03
26         beopct_all         L.beopct_all  5.933592e-01 -5.783581e-01  1.765077e+00
27         beopct_all                 <NA>  1.383761e+01 -1.154635e+01  4.096304e+01
28        imprt_t_jur        L.imprt_t_jur  1.242212e+00  1.123505e+00  1.360918e+00
29        imprt_t_jur       L2.imprt_t_jur -2.815293e-01 -5.165615e-01 -4.649714e-02
30        imprt_t_jur       L3.imprt_t_jur  1.083117e-01 -6.207956e-03  2.228313e-01
31        imprt_t_jur       L4.imprt_t_jur -9.202066e-02 -1.618330e-01 -2.220832e-02
32         beopct_all         L.beopct_all  9.742309e-01 -5.958731e-01  2.544335e+00
33         beopct_all                 <NA>  4.423492e+01 -3.085311e+01  1.422427e+02
34      officers_pcap      L.officers_pcap  5.784559e-01  4.393734e-01  7.175384e-01
35      officers_pcap     L2.officers_pcap  2.344908e-01  1.913868e-01  2.775949e-01
36      officers_pcap     L3.officers_pcap  1.157427e-01 -1.509706e-02  2.465825e-01
37      officers_pcap     L4.officers_pcap -6.744826e-02 -1.265258e-01 -8.370750e-03
38         beopct_all         L.beopct_all  8.066973e-01  7.220924e-03  1.606174e+00
39         beopct_all                 <NA>  5.633741e+00 -4.119109e-02  1.113005e+01
40        incrt_t_jur        L.incrt_t_jur  9.630046e-01  9.480034e-01  9.780058e-01
41         beopct_all         L.beopct_all  4.528512e-01 -9.879192e-01  1.893622e+00
42       pct_blackpop       L.pct_blackpop -4.346863e-01 -2.154776e+00  1.285403e+00
43        violent_crt        L.violent_crt  2.396950e+00  8.893616e-01  3.904538e+00
44 all_allraces_loess L.all_allraces_loess  6.784035e+00 -2.191901e+01  3.548708e+01
45 demcontrol.klarner L.demcontrol.klarner  2.891956e-01 -6.524769e-01  1.230868e+00
46           txtot_pc           L.txtot_pc  1.495001e+00  1.019804e+00  1.970199e+00
47          unemprate          L.unemprate  2.815736e-01 -8.799298e-01  1.443077e+00
48       lnrealinc_pc       L.lnrealinc_pc -8.692672e-01 -4.316744e+00  2.578209e+00
49         growthrate         L.growthrate -2.325546e-01 -8.647419e-01  3.996327e-01
50               gini               L.gini -1.253445e+00 -3.182957e+00  6.760674e-01
51         beopct_all                 <NA>  1.265670e+01 -3.666635e+01  5.240148e+01
52       pct_blackpop                 <NA> -1.306152e+01 -5.779114e+01  4.077189e+01
53        violent_crt                 <NA>  6.530605e+01  2.697024e+01  1.165738e+02
54 all_allraces_loess                 <NA>  1.710986e+02 -5.669185e+02  1.117581e+03
55 demcontrol.klarner                 <NA>  7.400704e+00 -1.592128e+01  4.310221e+01
56           txtot_pc                 <NA>  4.035520e+01  2.308806e+01  7.236152e+01
57          unemprate                 <NA>  8.894689e+00 -2.702518e+01  4.702998e+01
58       lnrealinc_pc                 <NA> -2.476204e+01 -1.194883e+02  7.960330e+01
59         growthrate                 <NA> -6.354740e+00 -2.710795e+01  9.888330e+00
60               gini                 <NA> -3.379879e+01 -1.104197e+02  1.779803e+01
61        imprt_t_jur        L.imprt_t_jur  9.826461e-01  9.659666e-01  9.993256e-01
62         beopct_all         L.beopct_all  7.280296e-01 -1.417914e+00  2.873974e+00
63       pct_blackpop       L.pct_blackpop  1.377380e-01 -1.924775e+00  2.200251e+00
64        violent_crt        L.violent_crt  4.464276e+00  2.011231e+00  6.917320e+00
65 all_allraces_loess L.all_allraces_loess  3.233008e+01 -7.155149e+00  7.181531e+01
66 demcontrol.klarner L.demcontrol.klarner -1.873032e-02 -1.323852e+00  1.286391e+00
67           txtot_pc           L.txtot_pc  2.215555e+00 -1.817257e+00  6.248367e+00
68          unemprate          L.unemprate -1.014281e+00 -2.881149e+00  8.525861e-01
69       lnrealinc_pc       L.lnrealinc_pc -5.785903e-01 -7.179226e+00  6.022046e+00
70         growthrate         L.growthrate -8.120469e-01 -1.861582e+00  2.374879e-01
71               gini               L.gini -3.008640e+00 -7.271733e+00  1.254454e+00
72         beopct_all                 <NA>  4.458937e+01 -2.106164e+02  2.128279e+02
73       pct_blackpop                 <NA> -2.718722e-01 -2.223083e+02  3.433065e+02
74        violent_crt                 <NA>  2.488756e+02  9.799972e+01  1.031370e+03
75 all_allraces_loess                 <NA>  1.786829e+03 -6.806119e+02  1.602768e+04
76 demcontrol.klarner                 <NA>  4.243683e-01 -1.803450e+02  2.134061e+02
77           txtot_pc                 <NA>  1.264765e+02 -2.152008e+02  1.182600e+03
78          unemprate                 <NA> -5.844517e+01 -5.099310e+02  6.645053e+01
79       lnrealinc_pc                 <NA> -4.034447e+01 -5.921704e+02  1.083189e+03
80         growthrate                 <NA> -4.591741e+01 -3.260876e+02  2.575337e+01
81               gini                 <NA> -1.529482e+02 -1.635071e+03  9.915275e+01
82      officers_pcap      L.officers_pcap  7.837922e-01  6.998437e-01  8.677407e-01
83         beopct_all         L.beopct_all  1.174011e+00  1.889757e-01  2.159047e+00
             se          pval            t     type    pval.class
1   0.007069678  0.000000e+00 136.80670592 shortrun          <NA>
2   0.704848414  5.940198e-01   0.53311972 shortrun          <NA>
3            NA            NA           NA  longrun       not sig
4   0.008361132  0.000000e+00 118.98197133 shortrun          <NA>
5   1.062310177  6.034954e-01   0.51949628 shortrun          <NA>
6            NA            NA           NA  longrun       not sig
7   0.037302696  1.587976e-93  21.87455105 shortrun          <NA>
8   0.506419512  2.550535e-02   2.23565642 shortrun          <NA>
9            NA            NA           NA  longrun at alpha=0.05
10  0.060681534  4.767311e-69  18.38600674 shortrun          <NA>
11  0.059750085  1.002145e-02  -2.57795755 shortrun          <NA>
12  0.634360923  4.591781e-01   0.74036807 shortrun          <NA>
13           NA            NA           NA  longrun       not sig
14  0.046079701 7.112104e-130  26.98447474 shortrun          <NA>
15  0.049952524  2.008315e-07  -5.22425348 shortrun          <NA>
16  0.866957565  3.379215e-01   0.95860298 shortrun          <NA>
17           NA            NA           NA  longrun       not sig
18  0.058610880  2.439063e-23  10.10368699 shortrun          <NA>
19  0.042238996  3.091298e-10   6.33243810 shortrun          <NA>
20  0.425878221  5.276273e-02   1.93823068 shortrun          <NA>
21           NA            NA           NA  longrun at alpha=0.05
22  0.058152478  1.032966e-72  18.92961483 shortrun          <NA>
23  0.073039669  2.709098e-01  -1.10132553 shortrun          <NA>
24  0.045843432  9.286813e-01  -0.08951723 shortrun          <NA>
25  0.027284091  2.525842e-02  -2.23939147 shortrun          <NA>
26  0.597814988  3.210712e-01   0.99254661 shortrun          <NA>
27           NA            NA           NA  longrun       not sig
28  0.060564496  4.181422e-82  20.51056240 shortrun          <NA>
29  0.119914384  1.902349e-02  -2.34775284 shortrun          <NA>
30  0.058428384  6.398176e-02   1.85375102 shortrun          <NA>
31  0.035618540  9.879255e-03  -2.58350447 shortrun          <NA>
32  0.801073445  2.241271e-01   1.21615674 shortrun          <NA>
33           NA            NA           NA  longrun       not sig
34  0.070960452  6.947005e-16   8.15180644 shortrun          <NA>
35  0.021991864  9.966124e-26  10.66261729 shortrun          <NA>
36  0.066754993  8.312983e-02   1.73384367 shortrun          <NA>
37  0.030141586  2.537071e-02  -2.23771431 shortrun          <NA>
38  0.407896106  4.812592e-02   1.97770285 shortrun          <NA>
39           NA            NA           NA  longrun at alpha=0.10
40  0.007653689  0.000000e+00 125.82228093 shortrun          <NA>
41  0.735086924  5.379429e-01   0.61605121 shortrun          <NA>
42  0.877596774  6.204420e-01  -0.49531438 shortrun          <NA>
43  0.769177694  1.862195e-03   3.11624989 shortrun          <NA>
44 14.644412663  6.432438e-01   0.46325073 shortrun          <NA>
45  0.480445155  5.472989e-01   0.60193268 shortrun          <NA>
46  0.242447716  8.708916e-10   6.16628290 shortrun          <NA>
47  0.592603782  6.347434e-01   0.47514646 shortrun          <NA>
48  1.758916599  6.212243e-01  -0.49420603 shortrun          <NA>
49  0.322544538  4.710084e-01  -0.72099990 shortrun          <NA>
50  0.984444999  2.031028e-01  -1.27325016 shortrun          <NA>
51           NA            NA           NA  longrun       not sig
52           NA            NA           NA  longrun       not sig
53           NA            NA           NA  longrun at alpha=0.01
54           NA            NA           NA  longrun       not sig
55           NA            NA           NA  longrun       not sig
56           NA            NA           NA  longrun at alpha=0.01
57           NA            NA           NA  longrun       not sig
58           NA            NA           NA  longrun       not sig
59           NA            NA           NA  longrun       not sig
60           NA            NA           NA  longrun       not sig
61  0.008509953  0.000000e+00 115.47020438 shortrun          <NA>
62  1.094869450  5.061930e-01   0.66494653 shortrun          <NA>
63  1.052302791  8.958794e-01   0.13089197 shortrun          <NA>
64  1.251553410  3.731407e-04   3.56698781 shortrun          <NA>
65 20.145525400  1.087549e-01   1.60482686 shortrun          <NA>
66  0.665878408  9.775634e-01  -0.02812874 shortrun          <NA>
67  2.057557324  2.817580e-01   1.07678899 shortrun          <NA>
68  0.952483424  2.871114e-01  -1.06488090 shortrun          <NA>
69  3.367671490  8.636136e-01  -0.17180722 shortrun          <NA>
70  0.535476961  1.296180e-01  -1.51649274 shortrun          <NA>
71  2.175047676  1.668056e-01  -1.38325232 shortrun          <NA>
72           NA            NA           NA  longrun       not sig
73           NA            NA           NA  longrun       not sig
74           NA            NA           NA  longrun at alpha=0.05
75           NA            NA           NA  longrun       not sig
76           NA            NA           NA  longrun       not sig
77           NA            NA           NA  longrun       not sig
78           NA            NA           NA  longrun       not sig
79           NA            NA           NA  longrun       not sig
80           NA            NA           NA  longrun       not sig
81           NA            NA           NA  longrun       not sig
82  0.042830854  2.545880e-68  18.29970940 shortrun          <NA>
83  0.502569235  1.960810e-02   2.33601924 shortrun          <NA>
                               mname seq
1     incrt_t_jur.onelagdv.bivariate   1
2     incrt_t_jur.onelagdv.bivariate   1
3     incrt_t_jur.onelagdv.bivariate   1
4     imprt_t_jur.onelagdv.bivariate   2
5     imprt_t_jur.onelagdv.bivariate   2
6     imprt_t_jur.onelagdv.bivariate   2
7   officers_pcap.onelagdv.bivariate   3
8   officers_pcap.onelagdv.bivariate   3
9   officers_pcap.onelagdv.bivariate   3
10    incrt_t_jur.twolagdv.bivariate   4
11    incrt_t_jur.twolagdv.bivariate   4
12    incrt_t_jur.twolagdv.bivariate   4
13    incrt_t_jur.twolagdv.bivariate   4
14    imprt_t_jur.twolagdv.bivariate   5
15    imprt_t_jur.twolagdv.bivariate   5
16    imprt_t_jur.twolagdv.bivariate   5
17    imprt_t_jur.twolagdv.bivariate   5
18  officers_pcap.twolagdv.bivariate   6
19  officers_pcap.twolagdv.bivariate   6
20  officers_pcap.twolagdv.bivariate   6
21  officers_pcap.twolagdv.bivariate   6
22   incrt_t_jur.fourlagdv.bivariate   7
23   incrt_t_jur.fourlagdv.bivariate   7
24   incrt_t_jur.fourlagdv.bivariate   7
25   incrt_t_jur.fourlagdv.bivariate   7
26   incrt_t_jur.fourlagdv.bivariate   7
27   incrt_t_jur.fourlagdv.bivariate   7
28   imprt_t_jur.fourlagdv.bivariate   8
29   imprt_t_jur.fourlagdv.bivariate   8
30   imprt_t_jur.fourlagdv.bivariate   8
31   imprt_t_jur.fourlagdv.bivariate   8
32   imprt_t_jur.fourlagdv.bivariate   8
33   imprt_t_jur.fourlagdv.bivariate   8
34 officers_pcap.fourlagdv.bivariate   9
35 officers_pcap.fourlagdv.bivariate   9
36 officers_pcap.fourlagdv.bivariate   9
37 officers_pcap.fourlagdv.bivariate   9
38 officers_pcap.fourlagdv.bivariate   9
39 officers_pcap.fourlagdv.bivariate   9
40    incrt_t_jur.onelagdv.onelagivs  10
41    incrt_t_jur.onelagdv.onelagivs  10
42    incrt_t_jur.onelagdv.onelagivs  10
43    incrt_t_jur.onelagdv.onelagivs  10
44    incrt_t_jur.onelagdv.onelagivs  10
45    incrt_t_jur.onelagdv.onelagivs  10
46    incrt_t_jur.onelagdv.onelagivs  10
47    incrt_t_jur.onelagdv.onelagivs  10
48    incrt_t_jur.onelagdv.onelagivs  10
49    incrt_t_jur.onelagdv.onelagivs  10
50    incrt_t_jur.onelagdv.onelagivs  10
51    incrt_t_jur.onelagdv.onelagivs  10
52    incrt_t_jur.onelagdv.onelagivs  10
53    incrt_t_jur.onelagdv.onelagivs  10
54    incrt_t_jur.onelagdv.onelagivs  10
55    incrt_t_jur.onelagdv.onelagivs  10
56    incrt_t_jur.onelagdv.onelagivs  10
57    incrt_t_jur.onelagdv.onelagivs  10
58    incrt_t_jur.onelagdv.onelagivs  10
59    incrt_t_jur.onelagdv.onelagivs  10
60    incrt_t_jur.onelagdv.onelagivs  10
61    imprt_t_jur.onelagdv.onelagivs  11
62    imprt_t_jur.onelagdv.onelagivs  11
63    imprt_t_jur.onelagdv.onelagivs  11
64    imprt_t_jur.onelagdv.onelagivs  11
65    imprt_t_jur.onelagdv.onelagivs  11
66    imprt_t_jur.onelagdv.onelagivs  11
67    imprt_t_jur.onelagdv.onelagivs  11
68    imprt_t_jur.onelagdv.onelagivs  11
69    imprt_t_jur.onelagdv.onelagivs  11
70    imprt_t_jur.onelagdv.onelagivs  11
71    imprt_t_jur.onelagdv.onelagivs  11
72    imprt_t_jur.onelagdv.onelagivs  11
73    imprt_t_jur.onelagdv.onelagivs  11
74    imprt_t_jur.onelagdv.onelagivs  11
75    imprt_t_jur.onelagdv.onelagivs  11
76    imprt_t_jur.onelagdv.onelagivs  11
77    imprt_t_jur.onelagdv.onelagivs  11
78    imprt_t_jur.onelagdv.onelagivs  11
79    imprt_t_jur.onelagdv.onelagivs  11
80    imprt_t_jur.onelagdv.onelagivs  11
81    imprt_t_jur.onelagdv.onelagivs  11
82  officers_pcap.onelagdv.onelagivs  12
83  officers_pcap.onelagdv.onelagivs  12
 [ reached 'max' / getOption("max.print") -- omitted 439 rows ]

> #classify the shortrun pvals into pval class
> tmp<-estsdf$type=="shortrun"

> estsdf$pval.class[estsdf$pval<0.01 & tmp]<-"at alpha=0.01"

> estsdf$pval.class[estsdf$pval>=0.01 & estsdf$pval<0.05 & tmp]<-"at alpha=0.05"

> estsdf$pval.class[estsdf$pval>=0.05 & estsdf$pval<0.10 & tmp]<-"at alpha=0.10"

> estsdf$pval.class[estsdf$pval>=0.10 & tmp]<-"not sig"

> tmp<-is.na(estsdf$pval.class)

> if(sum(tmp)>0)
+   stop()

> #########################################################
> #########################################################
> 
> #GET FIT
> 
> this.sequence<-seq_along(mods)

> fitdf<-lapply(this.sequence,function(i) {
+   #i<-10
+   #get params
+   m<-mods[[i]]
+   #track progress
+   print(paste("Calc fit for model",i,"of",max(this.sequence)))
+   thisrow<-calcfits(m)
+ }) %>% rbind.fill
[1] "Calc fit for model 1 of 27"
[1] "Calc fit for model 2 of 27"
[1] "Calc fit for model 3 of 27"
[1] "Calc fit for model 4 of 27"
[1] "Calc fit for model 5 of 27"
[1] "Calc fit for model 6 of 27"
[1] "Calc fit for model 7 of 27"
[1] "Calc fit for model 8 of 27"
[1] "Calc fit for model 9 of 27"
[1] "Calc fit for model 10 of 27"
[1] "Calc fit for model 11 of 27"
[1] "Calc fit for model 12 of 27"
[1] "Calc fit for model 13 of 27"
[1] "Calc fit for model 14 of 27"
[1] "Calc fit for model 15 of 27"
[1] "Calc fit for model 16 of 27"
[1] "Calc fit for model 17 of 27"
[1] "Calc fit for model 18 of 27"
[1] "Calc fit for model 19 of 27"
[1] "Calc fit for model 20 of 27"
[1] "Calc fit for model 21 of 27"
[1] "Calc fit for model 22 of 27"
[1] "Calc fit for model 23 of 27"
[1] "Calc fit for model 24 of 27"
[1] "Calc fit for model 25 of 27"
[1] "Calc fit for model 26 of 27"
[1] "Calc fit for model 27 of 27"

> fitdf$mname<-modsdf$mname

> #########################################################
> #########################################################
> 
> #PUT TOGETHER
> 
> #put model info (from specs)
> #together with estimates
> #and fitdf
> 
> mergelist<-list(modsdf,estsdf,fitdf)

> finaldf<-Reduce(
+   function(...)
+     merge(..., by="mname", all=T),
+   mergelist
+ )

> #add sample info
> finaldf<-merge(
+   finaldf,
+   sampsdf,
+   by=c("dv")
+ )

> head(finaldf)
           dv                           mname     dvlag    ivspec          iv           term
1 imprt_t_jur imprt_t_jur.fourlagdv.bivariate fourlagdv bivariate imprt_t_jur  L.imprt_t_jur
2 imprt_t_jur imprt_t_jur.fourlagdv.bivariate fourlagdv bivariate imprt_t_jur L2.imprt_t_jur
3 imprt_t_jur imprt_t_jur.fourlagdv.bivariate fourlagdv bivariate imprt_t_jur L3.imprt_t_jur
4 imprt_t_jur imprt_t_jur.fourlagdv.bivariate fourlagdv bivariate imprt_t_jur L4.imprt_t_jur
5 imprt_t_jur imprt_t_jur.fourlagdv.bivariate fourlagdv bivariate  beopct_all   L.beopct_all
6 imprt_t_jur imprt_t_jur.fourlagdv.bivariate fourlagdv bivariate  beopct_all           <NA>
           mu        mu.min       mu.max         se         pval         t     type
1  1.24221187   1.123505459   1.36091828 0.06056450 4.181422e-82 20.510562 shortrun
2 -0.28152934  -0.516561527  -0.04649714 0.11991438 1.902349e-02 -2.347753 shortrun
3  0.10831168  -0.006207956   0.22283131 0.05842838 6.398176e-02  1.853751 shortrun
4 -0.09202066  -0.161832994  -0.02220832 0.03561854 9.879255e-03 -2.583504 shortrun
5  0.97423087  -0.595873084   2.54433482 0.80107345 2.241271e-01  1.216157 shortrun
6 44.23492370 -30.853105141 142.24274157         NA           NA        NA  longrun
     pval.class seq      bic      aic        r2     adjr2    sampname    N N.states     range
1 at alpha=0.01   8 13622.24 13181.02 0.9937271 0.9933698 imprt_t_jur 1504       43 1974-2008
2 at alpha=0.05   8 13622.24 13181.02 0.9937271 0.9933698 imprt_t_jur 1504       43 1974-2008
3 at alpha=0.10   8 13622.24 13181.02 0.9937271 0.9933698 imprt_t_jur 1504       43 1974-2008
4 at alpha=0.01   8 13622.24 13181.02 0.9937271 0.9933698 imprt_t_jur 1504       43 1974-2008
5       not sig   8 13622.24 13181.02 0.9937271 0.9933698 imprt_t_jur 1504       43 1974-2008
6       not sig   8 13622.24 13181.02 0.9937271 0.9933698 imprt_t_jur 1504       43 1974-2008

> #########################################################
> #########################################################
> 
> #IDENTIFY PREFESTS
> 
> finaldf<-by(finaldf,finaldf$dv,function(df) {
+   #df<-finaldf[finaldf$dv=="incrt_t_jur",]
+   #trim
+   rankvars<-c("mname","aic","bic","r2","adjr2")
+   tmpdf<-unique(df[,rankvars])
+   #rank
+   tmpdf$aic.rank<-rank(tmpdf$aic)
+   tmpdf$bic.rank<-rank(tmpdf$bic)
+   tmpdf$r2.rank<-length(tmpdf$r2) + 1 - rank(tmpdf$r2)
+   tmpdf$adjr2.rank<-length(tmpdf$adjr2) + 1 - rank(tmpdf$adjr2)
+   #pref by AIC
+   tmpdf$pref<-ifelse(tmpdf$aic.rank==1,T,F)
+   #pref by BIC
+   tmpdf$prefbic<-ifelse(tmpdf$bic.rank==1,T,F)
+   #merge back in
+   mergevars<-c("mname","pref","prefbic")
+   merge(
+     df,
+     tmpdf[,mergevars],
+     by="mname"
+   )
+ }) %>% rbind.fill

> #########################################################
> #########################################################
> 
> #current mu's are semi-standardized
> #add standardized estimates for display
> tmp<-sdsdf$var%in%dvs

> dvsd_df<-sdsdf[tmp,c("var","sd")]

> names(dvsd_df)<-c("dv","dvsd")

> finaldf<-merge(
+   finaldf,
+   dvsd_df
+ )

> finaldf$musd<-finaldf$mu/finaldf$dvsd

> finaldf$musd.min<-finaldf$mu.min/finaldf$dvsd

> finaldf$musd.max<-finaldf$mu.max/finaldf$dvsd

> #########################################################
> #########################################################
> 
> #save out
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> write.csv(
+   finaldf,
+   "03_dind_regresultsdf.csv",
+   row.names=F
+ )

> #save forms and other output
> saveRDS(
+   list(
+     forms=forms,
+     mods=mods,
+     samps=samps,
+     sdsdf=sdsdf
+   )
+   ,
+   "03_dind_regstuff.RDS"
+ )
[1] "######"
[1] "Running:"
[1] "03_regmods_robustness.R"

> #########################################################
> #########################################################
> 
> #clear workspace
> rm(list=ls())

> #load packages
> require(stringr)

> require(plyr)

> require(dplyr)

> require(zoo)

> require(data.table)

> require(tidyr)

> require(rprojroot)

> #extras
> require(haven)

> require(readr)

> require(plm)

> #set dirs
> rootdir<-find_root(
+   criterion=has_file('_rapol.Rproj')
+ )

> codedir<-file.path(rootdir,"code")

> setwd(codedir); dir()
[1] "01_publicopinion" "02_voting"        "03_dind"          "dirs.R"          
[5] "runeverything.R" 

> source('dirs.R')

> #load helper functions
> setwd(dcodedir); dir()
 [1] "01_dindmods.R"           "02_regmods.R"            "03_regmods_robustness.R"
 [4] "04_summarize.R"          "beofunctions.R"          "beofunctions2.R"        
 [7] "checkunitroots.R"        "dirs.R"                  "getestimates.R"         
[10] "getlongrun2.R"           "XX_figures.R"            "XX_footnotes.R"         
[13] "XX_intmods.R"            "XX_runall.R"             "XX_tables.R"            
[16] "XX_unitroots.R"          "XX_unitroots_output.R"   "XX_unitroots2.R"        
[19] "XX_writeplots.R"        

> source('beofunctions.R')

> source('beofunctions2.R')

> source('getlongrun2.R')

> #set seed
> set.seed(23)

> reps<-1000

> #########################################################
> #########################################################
> 
> #load regmods image
> # setwd(filesdir); dir()
> # # load(
> # #   'regmods.RData'
> # # )
> 
> #load the original beodf
> setwd(datadir); dir()
 [1] "abcdfs.csv"                                 "All_Bills(1947-2017).csv"                  
 [3] "american_violence_data_20241016_201046.csv" "anesdf.csv"                                
 [5] "beodf.csv"                                  "beodf5.csv"                                
 [7] "billsdf_revised.csv"                        "cbcdf.csv"                                 
 [9] "cbsdfs.csv"                                 "citecount.csv"                             
[11] "congressmen_list.csv"                       "conventional.bib"                          
[13] "cpopdf.csv"                                 "Ethnic_Collect_Action.dta"                 
[15] "fulldf_bills.csv"                           "gallupdfs.csv"                             
[17] "gssdf.csv"                                  "Hall_votes.csv"                            
[19] "histpundf_national_220328.csv"              "housevotes_handcoded_revised.csv"          
[21] "HSall_members.csv"                          "HSall_rollcalls_withissues.csv"            
[23] "incrates_subnationalstate.csv"              "lbj1964.csv"                               
[25] "nbclatdfs.csv"                              "prezdf.csv"                                
[27] "questions_fortex.txt"                       "race_riot.dta"                             
[29] "redf.csv"                                   "roperdfs.csv"                              
[31] "tab_longviolence.csv"                       "timedfs.csv"                               
[33] "votes"                                     

> beodfraw<-read.csv(
+   'beodf.csv',
+   stringsAsFactors=F
+ )

> #add lags of difference of imprt_t_jur
> #this is necessary b/c unitroot tests flag it
> beodfraw<-by(beodfraw,beodfraw$state_alpha2,function(df) {
+   #df<-beodfraw[beodfraw$state_alpha2=='AL',]
+   df$L3.D.imprt_t_jur <- dplyr::lag(df$L2.D.imprt_t_jur)
+   df$L4.D.imprt_t_jur <- dplyr::lag(df$L3.D.imprt_t_jur)
+   df$L3.D.officers_pcap <- dplyr::lag(df$L2.D.officers_pcap)
+   df$L4.D.officers_pcap <- dplyr::lag(df$L3.D.officers_pcap)
+   df
+ }) %>% rbind.fill

> #we also want five yar
> setwd(datadir); dir()
 [1] "abcdfs.csv"                                 "All_Bills(1947-2017).csv"                  
 [3] "american_violence_data_20241016_201046.csv" "anesdf.csv"                                
 [5] "beodf.csv"                                  "beodf5.csv"                                
 [7] "billsdf_revised.csv"                        "cbcdf.csv"                                 
 [9] "cbsdfs.csv"                                 "citecount.csv"                             
[11] "congressmen_list.csv"                       "conventional.bib"                          
[13] "cpopdf.csv"                                 "Ethnic_Collect_Action.dta"                 
[15] "fulldf_bills.csv"                           "gallupdfs.csv"                             
[17] "gssdf.csv"                                  "Hall_votes.csv"                            
[19] "histpundf_national_220328.csv"              "housevotes_handcoded_revised.csv"          
[21] "HSall_members.csv"                          "HSall_rollcalls_withissues.csv"            
[23] "incrates_subnationalstate.csv"              "lbj1964.csv"                               
[25] "nbclatdfs.csv"                              "prezdf.csv"                                
[27] "questions_fortex.txt"                       "race_riot.dta"                             
[29] "redf.csv"                                   "roperdfs.csv"                              
[31] "tab_longviolence.csv"                       "timedfs.csv"                               
[33] "votes"                                     

> beodf5<-read.csv(
+   'beodf5.csv',
+   stringsAsFactors=F
+ )

> #load the results
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> finaldf<-read.csv(
+   '03_dind_regresultsdf.csv',
+   stringsAsFactors=F
+ )

> #load the things that go along with it
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> reglist<-readRDS(
+   '03_dind_regstuff.RDS'
+ )

> forms<-reglist$forms

> mods<-reglist$mods

> samps<-reglist$samps

> sdsdf<-reglist$sdsdf

> #get unit root info
> setwd(dcodedir); dir()
 [1] "01_dindmods.R"           "02_regmods.R"            "03_regmods_robustness.R"
 [4] "04_summarize.R"          "beofunctions.R"          "beofunctions2.R"        
 [7] "checkunitroots.R"        "dirs.R"                  "getestimates.R"         
[10] "getlongrun2.R"           "XX_figures.R"            "XX_footnotes.R"         
[13] "XX_intmods.R"            "XX_runall.R"             "XX_tables.R"            
[16] "XX_unitroots.R"          "XX_unitroots_output.R"   "XX_unitroots2.R"        
[19] "XX_writeplots.R"        

> #########################################################
> #########################################################
> 
> #perform unit root tests
> 
> #set params
> 
> #vars
> tmp<-finaldf$pref &
+   finaldf$dv%in%c(
+     'imprt_t_jur',
+     'officers_pcap'
+   )

> prefmods<-unique(finaldf$mname[tmp])

> vars<-lapply(forms[prefmods],all.vars) %>% 
+   unlist %>% unique

> #omit some
> badvars<-c(
+   "year"
+ )

> lagprefs<-c("L.","L2.","L3.","L4.")

> dv_vars<-c(
+   paste0(lagprefs,"imprt_t_jur"),
+   paste0(lagprefs,"officers_pcap")
+ )

> badvars<-c(
+   badvars,
+   dv_vars
+ )

> tmp<-!vars%in%badvars

> vars<-vars[tmp]

> #misc parameters
> maxlags<-3

> #BALANCED TEST
> 
> #the data are mostly balanced, 
> #so all we want is to loop through
> #and pick the 50 states
> 
> #where there is an issue, b/c one state is missing
> #or for some other reason, we adjust slightly
> #this gives us three tests per variable
> 
> loopdf<-expand.grid(
+   var=vars,
+   stringsAsFactors=F
+ )

> loopdf$i<-1:nrow(loopdf)

> #put together a balanced dataset for each variable
> #i will want at least N.countries, and so I pick the
> #T for each variable that gives me this many N.countires
> #in a balanced dataset
> 
> tmpseq.i<-1:nrow(loopdf)

> baldf<-lapply(tmpseq.i,function(i) {
+   
+   
+   #i<-8
+   print(
+     paste(
+       i,"of",length(tmpseq.i)
+     )
+   )
+   
+   ####
+   
+   thisvar<-loopdf$var[i]
+   
+   ###
+   
+   #get countries desired, given N countries
+   thisdf<-beodfraw
+   tmpa2s<-tapply(
+     thisdf[[thisvar]],
+     thisdf$state_alpha2,
+     function(x) sum(!is.na(x))
+   ) %>% sort
+   tmpa2s<-tmpa2s[tmpa2s!=0]
+   tmptab<-table(tmpa2s)
+   if(length(tmptab)!=1) {
+     if(thisvar=="imprt_t_jur") {
+       minN<-41
+     } else if(thisvar=="L.violent_crt") {
+       minN<-51
+     } else {
+       stop('inspect')
+     }
+   } else {
+     minN<-tmpa2s[1]
+   }
+   tmpa2s<-tmpa2s[tmpa2s==minN]
+   
+   
+   
+   ###
+   
+   #create a tmpdf w/ this many obs,
+   #using thesecows
+   tmpvars<-c("state_alpha2","year",thisvar)
+   tmprows<-thisdf$state_alpha2%in%names(tmpa2s)
+   tmpdf<-thisdf[tmprows,tmpvars]
+   testdf<-by(tmpdf,tmpdf$state_alpha2,function(df) {
+     #df<-tmpdf[tmpdf$cowcode.num==2,]
+     df<-df[!is.na(df[[thisvar]]),]
+     data.frame(
+       thisvar=tail(df[[thisvar]],minN),
+       time=1:minN,
+       state_alpha2=unique(df$state_alpha2),
+       stringsAsFactors=F
+     )
+   }) %>% rbind.fill
+   
+   ###
+   
+   #make this wide rather than long
+   is.na(testdf$thisvar) %>% sum
+   is.nan(testdf$thisvar) %>% sum
+   sum(!is.finite(testdf$thisvar))
+   
+   #here's my dataframe
+   testdf<-spread(
+     testdf,
+     state_alpha2,
+     thisvar
+   )
+   
+   #but remove inel cols (this which don't change)
+   badcols<-apply(
+     testdf,2,
+     function(x) length(unique(x))==1
+   )
+   testdf<-testdf[,!badcols]
+   
+   #and more badcols are those w/o sufficient variance
+   #purtest() throws an error if these are included
+   #and badcols, which are countries in which there isn't much variation
+   if(thisvar=="L.demcontrol.klarner") {
+     bada2s<-c("HI")
+   } else {
+     bada2s<-c("")
+   }
+   badcols<-names(testdf)%in%bada2s
+   testdf<-testdf[,!badcols]
+   #check variances
+   tmpvariances<-apply(
+     testdf,2,
+     var
+   ) 
+   sort(tmpvariances)
+   
+   ###
+   
+   #run the tests,get restuls
+   tests<-c(
+     "levinlin",
+     "ips",
+     "madwu",
+     "hadri"
+   )
+   returndf<-lapply(tests,function(mytest) {
+     #mytest<-"levinlin"
+     #print(mytest)
+     testdf$time<-NULL
+     tmptest<-purtest(
+       testdf,
+       test=mytest,
+       exo='intercept',
+       lags='AIC',
+       pmax=maxlags
+     )
+     returndf<-data.frame(
+       #test=tmptest$statistic$method,
+       test=mytest,
+       pval=tmptest$statistic$p.value
+     )
+     if(mytest=="hadri") {
+       returndf$unitroot<-ifelse(
+         returndf$pval>0.05,"No","Yes"
+       )
+     } else {
+       returndf$unitroot<-ifelse(
+         returndf$pval<0.05,"No","Yes"
+       )
+     }
+     returndf
+   }) %>% rbind.fill
+   
+   ###
+   
+   returndf$N.actual<-ncol(testdf) - 1
+   returndf$i<-i
+   returndf
+   
+ }) %>% rbind.fill
[1] "1 of 12"
[1] "2 of 12"
[1] "3 of 12"
[1] "4 of 12"
[1] "5 of 12"
[1] "6 of 12"
[1] "7 of 12"
[1] "8 of 12"
[1] "9 of 12"
[1] "10 of 12"
[1] "11 of 12"
[1] "12 of 12"

> #merge loopdf and returndf
> intersect(
+   names(loopdf),
+   names(baldf)
+ )
[1] "i"

> baldf<-merge(
+   loopdf,
+   baldf,
+   by="i",
+   all=T
+ )

> #any potential unitroot vars?
> tmp<-baldf$test!="hadri"

> tmptab<-tapply(
+   baldf$unitroot[tmp],
+   baldf$var[tmp],
+   function(x) 
+     sum(x=="Yes")
+ )

> urvars<-names(tmptab[tmptab>=2])

> ########################################################
> ########################################################
> 
> urvars<-str_replace(
+   urvars,
+   "\\L.",""
+ )

> iv.urvars<-paste0("L([0-9]+)?\\.",urvars)

> dv.urvars<-paste0("^",urvars)

> ur.regex<-paste0("(",paste0(c(dv.urvars,iv.urvars),collapse="|"),")")

> #quick function to transform
> #a formula into a urformula
> urtransform<-function(form,asformula=T) {
+   #form<-int.f
+   #formula=T
+   #if it's not a formula, change to one
+   if(class(form)!="character") {
+     form.string<-as.character(form)
+     form<-paste(form.string[2],form.string[1],form.string[3])
+   }
+   urform<-str_replace_all(form,ur.regex,"D.\\1")
+   #adjustment needed, b/c of how i created vars
+   #switch the L and the D around
+   urform<-str_replace_all(urform,"(D\\.)(L([0-9]+)?\\.)","\\2\\1")
+   thisformula<-urform
+   if(asformula)
+     thisformula<-thisformula %>% as.formula
+   return(thisformula)
+ }

> #########################################################
> #########################################################
> 
> #SET UP ROBMODS LOOP
> 
> #models are named externally
> setwd(metadir); dir()
 [1] "01_po_qinfo.csv"               "01_po_varsdf.csv"             
 [3] "03_dind_cdf.csv"               "03_dind_ddmodsdf.csv"         
 [5] "03_dind_ddvarsdf.csv"          "03_dind_regvarsdf.csv"        
 [7] "03_dind_robdisplay_EDIT.csv"   "03_dind_robdisplay_EDITED.csv"
 [9] "03_dind_roborder_EDIT.csv"     "03_dind_roborder_EDITED.csv"  
[11] "03_dind_robustness.csv"       

> rawrobdf<-read.csv(
+   '03_dind_robustness.csv',
+   stringsAsFactors=F
+ )

> #we will only be running these mods
> #for incrt and officers
> dvs<-c(
+   "imprt_t_jur",
+   "officers_pcap"
+ )

> #those which are NA, are deprecated
> rawrobdf<-rawrobdf[!is.na(rawrobdf$order),]

> # #make a table, for output
> # rawrobdf$letter<-
> #   paste0(
> #     "(",LETTERS[1:nrow(rawrobdf)],")"
> #   )
> # setwd(outputdir)
> # write.csv(
> #   rawrobdf[,c("letter","propername")],
> #   "tab_robmods.csv",
> #   row.names=F
> # )
> 
> #to be used below
> getmodname<-function(mname) {
+   mname<-str_replace(mname,"_clSE","")
+   rawrobdf$propername[rawrobdf$mname==mname]
+ }

> getmodorder<-function(mname) {
+   mname<-str_replace(mname,"_clSE","")
+   rawrobdf$order[rawrobdf$mname==mname]
+ }

> #to loop through
> robdf<-expand.grid(
+   mod=rawrobdf$mname,
+   dv=dvs,
+   stringsAsFactors=F
+ )

> #########################################################
> #########################################################
> 
> #LOOP THROUGH
> 
> #trim?
> tmp<-rep(T,nrow(robdf))

> robdf<-robdf[tmp,]

> robdf$seq<-1:nrow(robdf)

> #LOOP!
> tmp.seq<-1:nrow(robdf)

> tmpoutput<-lapply(tmp.seq,function(i) {
+   
+   #i<-12
+   #get param
+   thisdv<-robdf$dv[i]
+   thismod<-robdf$mod[i]
+   #track progress
+   print("####")
+   print(
+     paste(
+       "Estimating model",
+       i,"of",max(tmp.seq)
+     )
+   )
+   print(thisdv)
+   print(thismod)
+   
+   #############
+   
+   #PREPATORY
+   
+   #get sample and formula from pref
+   tmp<-finaldf$dv==thisdv & 
+     finaldf$pref
+   #identify sample w/ logicals, 
+   #which makes robustness process easier,
+   #since we are adding new vars etc.
+   thisdf<-beodfraw
+   this.samp<-paste0(
+     thisdf$state_alpha2,
+     thisdf$year
+   ) %in% paste0(
+     samps[[thisdv]]$state_alpha2,
+     samps[[thisdv]]$year
+   )
+   this.mname<-unique(finaldf$mname[tmp])
+   thismodel.arg<-"within" #default pref.
+   thisform<-forms[[this.mname]]
+   
+   #############
+   
+   #which model?
+   #redefine samp/form as necessary
+   #and then re-estimate below
+   
+   #############
+   
+   #SET UP FORMULA/SAMPLE
+   
+   if(thismod=="pref") {
+     
+     #don't change anything
+     
+   } else if (thismod=="re") {
+     
+     thismodel.arg<-"random"
+     
+   } else if (thismod=="pooled") {
+     
+     thismodel.arg<-"pooling"
+     
+   } else if (thismod=="5year") {
+     
+     thisdf<-beodf5
+     this.samp<-rep(T,nrow(beodf5))
+     
+   } else if (thismod=="spending") {
+     
+     oldform<-deparse(thisform) %>%
+       paste0(collapse="")
+     thisform<-str_replace_all(
+       oldform,
+       "incrt\\_t\\_jur|imprt\\_t\\_jur",
+       "lncorrections_pcap"
+     ) %>% str_replace_all(
+       "officers\\_pcap",
+       "lnpolicesp_pcap"
+     ) %>% as.formula
+     if(thisdv=="incrt_t_jur" | thisdv=="imprt_t_jur") 
+       thisdv<-"lncorrections_pcap"
+     if(thisdv=="officers_pcap")
+       thisdv<-"lnpolicesp_pcap"
+     
+   } else if (thismod=="divtrend") {
+     
+     oldform<-deparse(thisform) %>%
+       paste0(collapse="")
+     thisform<-paste0(
+       oldform,
+       " + year:division"
+     ) %>% as.formula
+     
+   } else if (thismod=="regtrend") {
+     
+     oldform<-deparse(thisform) %>%
+       paste0(collapse="")
+     thisform<-paste0(
+       oldform,
+       " + year:region"
+     ) %>% as.formula
+     
+   } else if (thismod=='urtransform') {
+     
+     thisform<-urtransform(thisform)
+     if(thisdv%in%urvars) {
+       thisdv<-paste0("D.",thisdv)
+     }
+ 
+     
+   } else {
+     
+     stop(
+       print(paste(thismod,"not implemented."))
+     )
+     this.samp<-rep(F,nrow(demdf))
+     
+   }
+   
+   #############
+   
+   #ESTIMATION
+   
+   #get the df
+   tmpdf<-thisdf[this.samp,]
+   regvars<-all.vars(thisform)
+   regvars<-c("state_alpha2","year",regvars)
+   
+   #check that they're all present
+   tmp<-regvars%in%names(tmpdf)
+   if(sum(!tmp)>0) {
+     print(regvars[!tmp])
+     stop('missing vars')
+   }
+   
+   prez<-complete.cases(tmpdf[,regvars])
+   tmpdf<-tmpdf[prez,regvars]
+   
+   #restimate
+   
+   m.tmp<-plm(
+     data=tmpdf,
+     form=thisform,
+     model=thismodel.arg
+   )
+   vcov.tmp<-vcovHC(
+     m.tmp,
+     type="HC1",
+     cluster="group"
+   )
+   keyvar<-c("beopct_all")
+   keyvar.sd<-sdsdf$sd[sdsdf$var==keyvar]
+   if(str_detect(thismod,"ratio")) {
+     keyvar<-"beoratio"
+     keyvar.sd<-tapply(
+       tmpdf$L.beoratio,
+       tmpdf$state_alpha2,
+       sd,na.rm=T
+     ) %>% mean(na.rm=T)
+   }
+   returndf<-getlongrun2(
+     m=m.tmp,
+     vcov=vcov.tmp,
+     dv=thisdv,
+     iv=keyvar,
+     ivsd=keyvar.sd
+   )
+   #add standardized
+   dvsd<-tapply(
+     tmpdf[[thisdv]],
+     tmpdf$state_alpha2,
+     sd
+   ) %>% mean
+   returndf$musd<-returndf$mu/dvsd
+   returndf$musd.min<-returndf$mu.min/dvsd
+   returndf$musd.max<-returndf$mu.max/dvsd
+   returndf$iv<-"beopct_all"
+   returndf$N<-nrow(m.tmp$model)
+   returndf$N.states<-length(unique(tmpdf$state_alpha2))
+   returndf$seq<-i
+   #return
+   return(returndf)
+   
+ })
[1] "####"
[1] "Estimating model 1 of 12"
[1] "imprt_t_jur"
[1] "pref"
[1] "####"
[1] "Estimating model 2 of 12"
[1] "imprt_t_jur"
[1] "5year"
[1] "####"
[1] "Estimating model 3 of 12"
[1] "imprt_t_jur"
[1] "spending"
[1] "####"
[1] "Estimating model 4 of 12"
[1] "imprt_t_jur"
[1] "divtrend"
[1] "####"
[1] "Estimating model 5 of 12"
[1] "imprt_t_jur"
[1] "regtrend"
[1] "####"
[1] "Estimating model 6 of 12"
[1] "imprt_t_jur"
[1] "urtransform"
[1] "####"
[1] "Estimating model 7 of 12"
[1] "officers_pcap"
[1] "pref"
[1] "####"
[1] "Estimating model 8 of 12"
[1] "officers_pcap"
[1] "5year"
[1] "####"
[1] "Estimating model 9 of 12"
[1] "officers_pcap"
[1] "spending"
[1] "####"
[1] "Estimating model 10 of 12"
[1] "officers_pcap"
[1] "divtrend"
[1] "####"
[1] "Estimating model 11 of 12"
[1] "officers_pcap"
[1] "regtrend"
[1] "####"
[1] "Estimating model 12 of 12"
[1] "officers_pcap"
[1] "urtransform"

> #make estsdf
> robestsdf<-rbind.fill(tmpoutput)

> intersect(
+   names(robdf),
+   names(robestsdf)
+ )
[1] "seq"

> robestsdf<-merge(
+   robdf,
+   robestsdf,
+   by="seq"
+ )

> #########################################################
> #########################################################
> 
> #FINALIZE/SAVE OUT
> 
> #remove anything that wasn't estimated
> tmp<-is.na(robestsdf$mu)

> missing<-unique(robestsdf$mod[tmp])

> print(missing)
character(0)

> robestsdf<-robestsdf[!tmp,]

> #get modname
> #robestsdf$modname<-sapply(robestsdf$mod,getmodname)
> robestsdf$mname<-paste0(
+   robestsdf$dv,".",robestsdf$mod
+ )

> #########################################################
> #########################################################
> 
> #save out
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> #save.image("robusts.RData")
> write.csv(
+   robestsdf,
+   "03_dind_robustnessdf.csv",
+   row.names=F
+ )

> #########################################################
> #########################################################
[1] "######"
[1] "Running:"
[1] "04_summarize.R"

> #########################################################
> #########################################################
> 
> #clear workspace
> rm(list=ls())

> #load packages
> require(stringr)

> require(plyr)

> require(dplyr)

> require(zoo)

> require(data.table)

> require(tidyr)

> require(rprojroot)

> #extras
> require(haven)

> require(readr)

> require(plm)

> #set dirs
> rootdir<-find_root(
+   criterion=has_file('_rapol.Rproj')
+ )

> codedir<-file.path(rootdir,"code")

> setwd(codedir); dir()
[1] "01_publicopinion" "02_voting"        "03_dind"          "dirs.R"          
[5] "runeverything.R" 

> source('dirs.R')

> #load helper functions
> setwd(dcodedir)

> source('beofunctions.R')

> source('beofunctions2.R')

> #########################################################
> #########################################################
> 
> #load all the info needed for output
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> #dd
> dddf<-read.csv(
+   '03_dind_resultsdf.csv',
+   stringsAsFactors=F
+ )

> dddf$approach<-"dd"

> #regs
> regdf<-read.csv(
+   '03_dind_regresultsdf.csv',
+   stringsAsFactors=F
+ )

> regdf$approach<-"reg"

> #robustness
> robsdf<-read.csv(
+   '03_dind_robustnessdf.csv',
+   stringsAsFactors=F
+ )

> robsdf$approach<-"reg"

> robsdf$pref<-F

> #identify the preferred ests in dd
> tmp<-dddf$dv%in%c(
+   "imprt_t_jur",
+   "officers_pcap"
+ )

> tmp<-tmp & 
+   dddf$spec%in%c("divtrend") &
+   dddf$method=="normal" &
+   dddf$sample=="full"

> dddf$pref<-F

> dddf$pref[tmp]<-T

> #add 'conventional' estimates
> tmpdf<-data.frame(
+   pref=T,
+   approach=c('expectation','expectation'),
+   dv=c("imprt_t_jur","officers_pcap"),
+   musd=c(-0.5,-0.5),
+   musd.min=c(-1,-1),
+   musd.max=c(0,0),
+   pval.class="at alpha=0.05"
+ )

> #add 'conventional' estimates
> tmpdf2<-data.frame(
+   pref=F,
+   approach=c('expectation'),
+   spec=c('racialthreat'),
+   dv=c("welfbenefits"),
+   musd=c(-0.5),
+   musd.min=c(-1),
+   musd.max=c(0),
+   pval.class="at alpha=0.05",
+   method='normal'
+ )

> #put them all together
> finaldf<-rbind.fill(
+   regdf,
+   robsdf,
+   dddf,
+   tmpdf,
+   tmpdf2
+ )

> #########################################################
> #########################################################
> 
> #plotting prelims
> require(ggplot2)

> require(ggthemes)

> require(extrafont)

> require(RColorBrewer)

> # #load fonts
> # loadfonts(quiet=T) #register w/ pdf
> # loadfonts(device = "win",quiet=T) #register w/ windows
> # #get ghostscript, for tex output
> # Sys.setenv(
> #   R_GSCMD = gsdir_full
> # )
> # #initialize graphlist
> # gs.list<-list()
> 
> #quick function to outputdfs
> output <- function(df,tmpname) {
+   setwd(outputdir)
+   if( str_detect(tmpname,"\\.pdf$|\\.png") ) 
+     tmpname<-str_replace(tmpname,"\\.pdf$|\\.png",".csv")
+   write.csv(
+     df,
+     tmpname,
+     row.names=F
+   )
+ }

> #########################################################
> #########################################################
> 
> #add shape et al to finaldf
> #add pval info to shape of point
> finaldf$pval.shp<-NA

> finaldf$pval.shp[finaldf$pval.class=="at alpha=0.01"]<-1

> finaldf$pval.shp[finaldf$pval.class=="at alpha=0.05"]<-2

> finaldf$pval.shp[finaldf$pval.class=="at alpha=0.10"]<-3

> finaldf$pval.shp[finaldf$pval.class=="not sig"]<-4

> finaldf$pval.shp<-factor(
+   finaldf$pval.shp,
+   levels=c(1,2,3,4),
+   labels=c(
+     "at alpha=0.01",
+     "at alpha=0.05",
+     "at alpha=0.10",
+     "not sig"
+   )
+ )

> #tmpshapes
> tmpshapes<-c(8,4,16,1)

> names(tmpshapes)<-levels(finaldf$pval.shp)

> shp.labels<-c(
+   bquote(alpha == 0.01),
+   bquote(alpha == 0.05),
+   bquote(alpha == 0.10)
+ )

> #get pval fill, for tile
> finaldf$pval.fill<-NA

> finaldf$pval.fill[finaldf$pval.class=="at alpha=0.01"]<-4

> finaldf$pval.fill[finaldf$pval.class=="at alpha=0.05"]<-3

> finaldf$pval.fill[finaldf$pval.class=="at alpha=0.10"]<-2

> finaldf$pval.fill[finaldf$pval.class=="not sig"]<-1

> negmu<-ifelse(finaldf$mu<0,-1,1)

> finaldf$pval.fill<-finaldf$pval.fill * negmu

> pval.labels<-c("at alpha=0.01","at alpha=0.05","at alpha=0.10","")

> tmplabels<-c(
+   paste0("- /",pval.labels),
+   paste0("+ /",rev(pval.labels))
+ )

> #assign levels,colors
> finaldf$pval.fill<-factor(
+   finaldf$pval.fill,
+   levels=c(-4,-3,-2,-1,1,2,3,4),
+   labels=tmplabels
+ )

> #for colors, consult brewer
> brewer.pal.info
         maxcolors category colorblind
BrBG            11      div       TRUE
PiYG            11      div       TRUE
PRGn            11      div       TRUE
PuOr            11      div       TRUE
RdBu            11      div       TRUE
RdGy            11      div      FALSE
RdYlBu          11      div       TRUE
RdYlGn          11      div      FALSE
Spectral        11      div      FALSE
Accent           8     qual      FALSE
Dark2            8     qual       TRUE
Paired          12     qual       TRUE
Pastel1          9     qual      FALSE
Pastel2          8     qual      FALSE
Set1             9     qual      FALSE
Set2             8     qual       TRUE
Set3            12     qual      FALSE
Blues            9      seq       TRUE
BuGn             9      seq       TRUE
BuPu             9      seq       TRUE
GnBu             9      seq       TRUE
Greens           9      seq       TRUE
Greys            9      seq       TRUE
Oranges          9      seq       TRUE
OrRd             9      seq       TRUE
PuBu             9      seq       TRUE
PuBuGn           9      seq       TRUE
PuRd             9      seq       TRUE
Purples          9      seq       TRUE
RdPu             9      seq       TRUE
Reds             9      seq       TRUE
YlGn             9      seq       TRUE
YlGnBu           9      seq       TRUE
YlOrBr           9      seq       TRUE
YlOrRd           9      seq       TRUE

> tmpcolors<-brewer.pal(8,"RdYlGn")

> names(tmpcolors)<-levels(finaldf$pval.fill)

> fill.labels<-c(
+   expression(paste(alpha==0.01,", ",beta<0)),
+   expression(paste(alpha==0.05,", ",beta<0)),
+   expression(paste(alpha==0.10,", ",beta<0)),
+   expression(paste(beta<0)),
+   expression(paste(beta>0)),
+   expression(paste(alpha==0.10,", ",beta>0)),
+   expression(paste(alpha==0.05,", ",beta>0)),
+   expression(paste(alpha==0.01,", ",beta>0))
+ )

> #########################################################
> #########################################################
> 
> #FIG 7 - PREFERRED ESTS
> 
> #get preferred ests
> 
> tmp<-finaldf$dv%in%c(
+   'imprt_t_jur',
+   'officers_pcap'
+ )

> tmp2<-(
+   finaldf$approach=='dd' & 
+     finaldf$pref
+ )

> # tmp3<-(
> #   finaldf$approach=="expectation"
> # )
> tmp<-tmp & (tmp2 )#| tmp3)

> plotdf<-finaldf[tmp,]

> #dv
> sapply(plotdf$dv,getvarorder) %>%
+   sort %>% names %>% unique
[1] "imprt_t_jur"   "officers_pcap"

> tmplevels<-plotdf$dv %>% unique

> tmplabels<-sapply(
+   tmplevels,
+   getvarname
+ )

> plotdf$dv<-factor(
+   plotdf$dv,
+   tmplevels,
+   tmplabels
+ )

> #approach
> plotdf$approach<-paste0(
+   plotdf$approach,"_",plotdf$spec
+ )

> tmplevels<-c(
+   "expectation_NA",
+   "dd_divtrend",
+   "dd_controls"
+ )

> tmplabels<-c(
+   "Conventional View",
+   "Estimated",
+   "Estimated (+ Controls)"
+ )

> plotdf$approach<-factor(
+   plotdf$approach,
+   tmplevels,
+   tmplabels
+ )

> g.tmp<-ggplot(
+   plotdf,
+   aes(
+     x=dv,
+     y=musd,
+     ymin=musd.min,
+     ymax=musd.max,
+     shape=pval.shp
+   )
+ ) + 
+   geom_point(
+     size=2
+   ) +
+   geom_errorbar(
+     size=0.4,
+     width=0.2
+   ) +
+   geom_hline(
+     yintercept=0,
+     linetype='dashed',
+     color='black'
+   ) +
+   scale_shape_manual(
+     name="",
+     values=tmpshapes,
+     labels=shp.labels,
+     drop=F
+   ) + 
+   scale_color_discrete(
+     name=""
+   ) +
+   ylab("\nImpact of Redistricting in SDs") +
+   xlab("") +
+   coord_flip() +
+   facet_wrap(
+     ~ approach,
+     ncol=1
+   ) + 
+   theme_bw()

> setwd(outputdir)

> tmpname<-"fig_dind_prefests.png"

> ggsave(
+   plot=g.tmp,
+   tmpname,
+   width=6,
+   height=2
+ )

> output(plotdf,tmpname)

> ggsave(
+   plot=g.tmp,
+   filename='CleggFig7.pdf',
+   width=6,
+   height=2
+ )

> #########################################################
> #########################################################
> 
> #FIG X - WELFARE BENEFITS
> 
> #also export welfbenefits
> tmp<-finaldf$dv=="welfbenefits" &
+   finaldf$spec%in%c(
+     "divtrend"#,
+     #"racialthreat"#,
+     #"controls"
+   ) &
+   finaldf$method=="normal" &
+   finaldf$sample=="full"

> plotdf<-finaldf[tmp,]

> plotdf$dv <- factor(
+   plotdf$dv,
+   levels='welfbenefits',
+   labels='Welfare Benefits'
+ )

> plotdf$spec <- factor(
+   plotdf$spec,
+   levels=c(
+     'racialthreat',
+     'divtrend',
+     'controls'
+   ) %>% rev,
+   labels=c(
+     'If Racial Threat',
+     'Estimated',
+     'Estimated (+ Controls)'
+   ) %>% rev
+ )

> g.tmp <- ggplot(
+   plotdf,
+   aes(
+     x=spec,
+     y=musd,
+     ymin=musd.min,
+     ymax=musd.max,
+     shape=pval.shp
+   )
+ ) + 
+   geom_point(
+     size=2
+   ) +
+   geom_errorbar(
+     size=0.4,
+     width=0.2
+   ) +
+   geom_hline(
+     yintercept=0,
+     linetype='dashed',
+     color='black'
+   ) +
+   scale_shape_manual(
+     name="",
+     values=tmpshapes,
+     labels=shp.labels,
+     drop=F
+   ) + 
+   scale_color_discrete(
+     name=""
+   ) +
+   ylab("\nImpact of Redistricting in SDs") +
+   xlab("") +
+   coord_flip() +
+   facet_wrap(
+     ~ dv,
+     ncol=1
+   ) +
+   theme_bw()

> setwd(outputdir)

> tmpname<-"fig_dind_welfare.png"

> ggsave(
+   plot=g.tmp,
+   tmpname,
+   width=8,
+   height=3
+ )

> output(plotdf,tmpname)

> plotdf
                  dv                             mname dvlag ivspec       iv     term
568 Welfare Benefits welfbenefits.divtrend.normal.full  <NA>   <NA> t.post.t t.post.t
          mu   mu.min   mu.max       se      pval        t     type    pval.class seq bic aic
568 20.28805 4.267176 36.30892 8.173915 0.0131801 2.482048 shortrun at alpha=0.05  NA  NA  NA
    r2 adjr2 sampname  N N.states range  pref prefbic     dvsd      musd  musd.min musd.max
568 NA    NA     <NA> NA       NA  <NA> FALSE      NA 35.25255 0.5755058 0.1210459 1.029966
    approach  mod      spec method sample      pval.shp        pval.fill
568       dd <NA> Estimated normal   full at alpha=0.05 + /at alpha=0.05

> #is welfare ever negative?
> tmp<-finaldf$dv=="welfbenefits" &
+   finaldf$method=="normal" &
+   finaldf$approach!='expectation'

> plotdf<-finaldf[tmp,]

> plotdf[plotdf$mu<0,]
 [1] dv         mname      dvlag      ivspec     iv         term       mu         mu.min    
 [9] mu.max     se         pval       t          type       pval.class seq        bic       
[17] aic        r2         adjr2      sampname   N          N.states   range      pref      
[25] prefbic    dvsd       musd       musd.min   musd.max   approach   mod        spec      
[33] method     sample     pval.shp   pval.fill 
<0 rows> (or 0-length row.names)

> #########################################################
> #########################################################
> 
> #FIG X - ROBUST ESTS
> 
> setwd(metadir); dir()
 [1] "01_po_qinfo.csv"               "01_po_varsdf.csv"             
 [3] "03_dind_cdf.csv"               "03_dind_ddmodsdf.csv"         
 [5] "03_dind_ddvarsdf.csv"          "03_dind_regvarsdf.csv"        
 [7] "03_dind_robdisplay_EDIT.csv"   "03_dind_robdisplay_EDITED.csv"
 [9] "03_dind_roborder_EDIT.csv"     "03_dind_roborder_EDITED.csv"  
[11] "03_dind_robustness.csv"       

> tmpdf<-read.csv(
+   '03_dind_robdisplay_EDITED.csv',
+   stringsAsFactors=F
+ )

> tmp<-tmpdf$facet!=""

> tmpdf<-tmpdf[tmp,]

> plotdf<-merge(
+   tmpdf,
+   finaldf,
+   all.x=T
+ )

> #restrict to desired coefs
> tmp<-plotdf$iv%in%c(
+   "beopct_all",
+   "t.post.t"
+ ) &
+   (
+     is.na(plotdf$type) 
+     | plotdf$type!="longrun"
+   )

> plotdf<-plotdf[tmp,]

> #there should be one estimate per model
> tmptab<-tapply(
+   plotdf$musd,
+   plotdf$mname,
+   function(x) length(unique(x))
+ )

> if(max(tmptab)>1)
+   stop()

> table(plotdf$x)

   imprt officers  welfare 
      20       20       12 

> # manually order the mods
> # tmpdf<-unique(plotdf[,c("facet","y")])
> # row.names(tmpdf)<-NULL
> # tmpdf<-unique(tmpdf)
> # tmpdf<-tmpdf[order(tmpdf$facet,tmpdf$y),]
> # write.csv(
> #   tmpdf,
> #   "03_dind_roborder_EDIT.csv",
> #   row.names=F
> # )
> 
> tmpdf<-read.csv(
+   '03_dind_roborder_EDITED.csv',
+   stringsAsFactors=F
+ )

> plotdf<-merge(
+   plotdf,
+   tmpdf
+ )

> #factors
> tmplevels<-c(
+   "dd",
+   "reg"
+ )

> tmplabels<-c(
+   "D-in-D",
+   "ADL"
+ )

> plotdf$facet<-factor(
+   plotdf$facet,
+   tmplevels,
+   tmplabels
+ )

> tmplevels<-c(
+   "imprt",
+   "officers",
+   "welfare"
+ )

> tmplabels<-c(
+   "Incarceration",
+   "Police",
+   "Welfare Benefits"
+ )

> plotdf$x<-factor(
+   plotdf$x,
+   tmplevels,
+   tmplabels
+ )

> #if ADL, add a space, so these can be unique labels
> plotdf$y[plotdf$facet=='ADL']<-paste0(" ",plotdf$y[plotdf$facet=='ADL'])

> tmporder<-order(plotdf$facet,plotdf$order)

> tmplevels<-plotdf$y[tmporder]

> plotdf$y<-factor(
+   plotdf$y,
+   rev(tmplevels),
+   rev(tmplevels)
+ )

> #add text
> plotdf$text<-formatC(
+   plotdf$musd,
+   digits=2,
+   replace.zero=T,
+   zero.print="0",
+   format='f',
+   preserve.width=T
+ )

> g.tmp<-ggplot(
+   #after revisions we choose only to show DD results
+   plotdf[plotdf$facet=='D-in-D',],
+   aes(
+     x=x,
+     y=y,
+     fill=pval.fill,
+     label=text
+   )
+ ) +
+   geom_tile(
+     color='black',
+     width=1
+   ) +
+   geom_text(
+     color='black'
+   ) +
+   facet_wrap(
+     ~ facet,
+     scales='free'
+   ) +
+   scale_fill_manual(
+     name="",
+     values=tmpcolors,
+     labels=fill.labels,
+     drop=F
+   ) +
+   xlab("") +
+   ylab("") +
+   theme_bw() +
+   theme(
+     panel.grid = element_blank(),
+     axis.line= element_blank(),
+     panel.border=element_blank(),
+     axis.ticks = element_blank()
+   )

> setwd(outputdir)

> tmpname<-"fig_dind_robests.png"

> ggsave(
+   plot=g.tmp,
+   tmpname,
+   width=8,
+   height=6
+ )

> output(plotdf,tmpname)

> plotdf[plotdf$mu<0 & plotdf$approach=='dd',]
    facet                 y                                mname      x              dv dvlag
18 D-in-D First Differences       officers_pcap.diff.normal.full Police   officers_pcap  <NA>
35 D-in-D          Spending lnpolicesp_pcap.divtrend.normal.full Police lnpolicesp_pcap  <NA>
   ivspec       iv     term          mu    mu.min   mu.max         se      pval          t
18   <NA> t.post.t t.post.t -0.47485493 -4.349326 3.399616 1.97677100 0.8102097 -0.2402175
35   <NA> t.post.t t.post.t -0.01978549 -0.193812 0.154241 0.08878902 0.8236940 -0.2228372
       type pval.class seq bic aic r2 adjr2 sampname  N N.states range  pref prefbic
18 shortrun    not sig  NA  NA  NA NA    NA     <NA> NA       NA  <NA> FALSE      NA
35 shortrun    not sig  NA  NA  NA NA    NA     <NA> NA       NA  <NA> FALSE      NA
         dvsd        musd   musd.min   musd.max approach  mod     spec method sample pval.shp
18 37.3992999 -0.01269689 -0.1162943 0.09090053       dd <NA>     diff normal   full  not sig
35  0.3545722 -0.05580103 -0.5466079 0.43500587       dd <NA> divtrend normal   full  not sig
   pval.fill order  text
18       - /     9 -0.01
35       - /    12 -0.06

> #########################################################
> #########################################################
> 
> #FIG - ILLUSTRATE THE RISE IN BLACK REPRESNTATION
> 
> setwd(filesdir); dir()
 [1] "01po_dataframe.csv"              "01po_grouped.RDS"               
 [3] "01po_modslist.RDS"               "01po_modslist_OLD.RDS"          
 [5] "01po_modslist_RandR.RDS"         "01po_predictions.RDS"           
 [7] "01po_predictions_OLD.RDS"        "01po_q_avgs.csv"                
 [9] "01po_q_diffs.csv"                "01po_regmods_info.csv"          
[11] "02_voting_cbcdf.csv"             "02_voting_fulldf.csv"           
[13] "02_voting_fulldf_classified.csv" "02_voting_membersdf.csv"        
[15] "02_voting_votesdf.csv"           "03_dind_beodf_dd.csv"           
[17] "03_dind_regresultsdf.csv"        "03_dind_regstuff.RDS"           
[19] "03_dind_resultsdf.csv"           "03_dind_robustnessdf.csv"       
[21] "tab_po_questions_EDIT.tex"      

> tmpdf<-read.csv(
+   '03_dind_beodf_dd.csv',
+   stringsAsFactors=F
+ )

> tmpdf<-data.table(tmpdf)

> plotdf<-tmpdf[
+   year%in%1980:2000 #&
+     # state_alpha2%in%c(
+     #   'MO', 
+     #   'AR', 
+     #   'LA', 
+     #   'OK', 
+     #   'TX', 
+     #   'AL', 
+     #   'KY', 
+     #   'MS', 
+     #   'TN', 
+     #   'DE', 
+     #   'FL', 
+     #   'GA', 
+     #   'MD', 
+     #   'NC', 
+     #   'SC', 
+     #   'VA', 
+     #   'WV', 
+     #   'AZ', 
+     #   'KS', 
+     #   'NM'
+     # )
+   ,
+   .(beopct=mean(beopct_all)),
+   by=c("year","t")
+ ]

> tmplevels<-c(
+   0,1
+ )

> tmplabels<-c(
+   "Not Redistricted",
+   "Redistricted"
+ )

> plotdf$t<-factor(
+   plotdf$t,
+   tmplevels,
+   tmplabels
+ )

> tmpcolors<-c(
+   'blue','red'
+ )

> names(tmpcolors)<-
+   levels(plotdf$t)

> g.tmp<-ggplot(
+   plotdf,
+   aes(
+     x=year,
+     y=beopct,
+     group=t,
+     color=t
+   )
+ ) +
+   geom_line(
+     size=1
+   ) + 
+   geom_vline(
+     xintercept=1990,
+     linetype='dashed',
+     color='black'
+   ) +
+   scale_color_manual(
+     name="",
+     values=tmpcolors
+   ) +
+   xlab("") + 
+   ylab("Black Representatives (%)\n") +
+   theme_bw() +
+   theme(
+     legend.direction='horizontal',
+     legend.position='bottom'
+   )

> tmpname<-paste0("fig_dind_beoshock.png")

> setwd(outputdir)

> ggsave(
+   plot=g.tmp,
+   filename=tmpname,
+   width=6,
+   height=6
+ )

> ## close sink + file
> sink(type = "output")
